[meta-intel] [PATCH 1/1] libva-intel-driver: workaround for concurrent VC1 and H264 playback issue

tom.zanussi at intel.com tom.zanussi at intel.com
Thu Dec 20 12:39:18 PST 2012


From: Tom Zanussi <tom.zanussi at intel.com>

Explanation from Terence Chiang:

"encountered a GFX issue while enabling HW video playback VC-1 and
H.264 simultaneously, the graphic driver report error with gfx hang on
Sandy Bridge platform. We worked with Intel Linux graphic team and
provided a patch"

Signed-off-by: Tom Zanussi <tom.zanussi at intel.com>
---
 ...for-concurrently-playing-VC1-and-H264-vid.patch |  440 ++++++++++++++++++++
 .../libva/libva-intel-driver_1.0.18.bb             |    2 +
 2 files changed, 442 insertions(+), 0 deletions(-)
 create mode 100644 common/recipes-multimedia/libva/libva-intel-driver-1.0.18/0001-Workaround-for-concurrently-playing-VC1-and-H264-vid.patch

diff --git a/common/recipes-multimedia/libva/libva-intel-driver-1.0.18/0001-Workaround-for-concurrently-playing-VC1-and-H264-vid.patch b/common/recipes-multimedia/libva/libva-intel-driver-1.0.18/0001-Workaround-for-concurrently-playing-VC1-and-H264-vid.patch
new file mode 100644
index 0000000..e000632
--- /dev/null
+++ b/common/recipes-multimedia/libva/libva-intel-driver-1.0.18/0001-Workaround-for-concurrently-playing-VC1-and-H264-vid.patch
@@ -0,0 +1,440 @@
+Upstream-Status: Pending
+
+From 43c3fd3ea485a0b9ad12c248a0a94a959ab4d5ee Mon Sep 17 00:00:00 2001
+From: "Xiang, Haihao" <haihao.xiang at intel.com>
+Date: Mon, 29 Oct 2012 10:01:16 +0800
+Subject: [PATCH] Workaround for concurrently playing VC1 and H264 video on SNB
+
+Signed-off-by: Xiang, Haihao <haihao.xiang at intel.com>
+---
+ src/gen6_mfd.c |  379 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
+ src/gen6_mfd.h |    3 +
+ 2 files changed, 380 insertions(+), 2 deletions(-)
+
+diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
+index fa2f128..b8c671b 100755
+--- a/src/gen6_mfd.c
++++ b/src/gen6_mfd.c
+@@ -50,6 +50,377 @@ static const uint32_t zigzag_direct[64] = {
+     53, 60, 61, 54, 47, 55, 62, 63
+ };
+ 
++/* Workaround for VC1 decoding */
++
++VAStatus 
++i965_DestroySurfaces(VADriverContextP ctx,
++                     VASurfaceID *surface_list,
++                     int num_surfaces);
++VAStatus 
++i965_CreateSurfaces(VADriverContextP ctx,
++                    int width,
++                    int height,
++                    int format,
++                    int num_surfaces,
++                    VASurfaceID *surfaces);
++
++static struct {
++    int width;
++    int height;
++    int mb_count;
++    unsigned char data[32];
++    int data_size;
++    int data_bit_offset;
++
++    unsigned int f_code:16;
++    unsigned int intra_dc_precision:2;
++    unsigned int picture_structure:2;
++    unsigned int top_field_first:1;
++    unsigned int frame_pred_frame_dct:1;
++    unsigned int concealment_motion_vectors:1;
++    unsigned int q_scale_type:1;
++    unsigned int intra_vlc_format:1;
++    unsigned int alternate_scan:1;
++    unsigned int picture_coding_type:1;
++    unsigned int pad0: 5;
++
++    unsigned int quantiser_scale_code;
++
++    unsigned char qm[2][64];
++} gen6_dwa_clip = {
++ width:      32,
++ height:     16,
++ mb_count:   2,
++ data:       {
++        0x00,   0x00,   0x01,   0x01,   0x1b,   0xfb,   0xfd,   0xf8,
++        0x02,   0x97,   0xef,   0xf8,   0x8b,   0x97,   0xe0,   0x0a,
++        0x5f,   0xbf,   0xe2,   0x20,   0x00,   0x00,   0x01,   0x00
++    },
++ data_size:  20,
++ data_bit_offset:            38,
++
++ f_code:             0xffff,
++ intra_dc_precision: 0,
++ picture_structure:  3,
++ top_field_first:    0,
++ frame_pred_frame_dct:       1,
++ concealment_motion_vectors: 0,
++ q_scale_type:       0,
++ intra_vlc_format:   0,
++ alternate_scan:     0,
++ picture_coding_type:        1, /* I frame */
++
++ quantiser_scale_code:       3,
++
++ qm:         {
++        {  
++            8,      16,     19,     22,     26,     27,     29,     34,
++            16,     16,     22,     24,     27,     29,     34,     37,
++            19,     22,     26,     27,     29,     34,     34,     38,
++            22,     22,     26,     27,     29,     34,     37,     40,
++            22,     26,     27,     29,     32,     35,     40,     48,
++            26,     27,     29,     32,     35,     40,     48,     58,
++            26,     27,     29,     34,     38,     46,     56,     69,
++            27,     29,     35,     38,     46,     56,     69,     83
++        },
++
++        {
++            16,     16,     16,     16,     16,     16,     16,     16,
++            16,     16,     16,     16,     16,     16,     16,     16,
++            16,     16,     16,     16,     16,     16,     16,     16,
++            16,     16,     16,     16,     16,     16,     16,     16,
++            16,     16,     16,     16,     16,     16,     16,     16,
++            16,     16,     16,     16,     16,     16,     16,     16,
++            16,     16,     16,     16,     16,     16,     16,     16,
++            16,     16,     16,     16,     16,     16,     16,     16,
++        }
++    },
++};
++
++static void
++gen6_dwa_init(VADriverContextP ctx,
++              struct gen6_mfd_context *gen6_mfd_context)
++{
++    struct i965_driver_data *i965 = i965_driver_data(ctx);
++    VAStatus status;
++    struct object_surface *obj_surface;
++
++    if (gen6_mfd_context->dwa_surface_id != VA_INVALID_SURFACE)
++        i965_DestroySurfaces(ctx,
++                             &gen6_mfd_context->dwa_surface_id,
++                             1);
++
++    status = i965_CreateSurfaces(ctx,
++                                 gen6_dwa_clip.width,
++                                 gen6_dwa_clip.height,
++                                 VA_RT_FORMAT_YUV420,
++                                 1,
++                                 &gen6_mfd_context->dwa_surface_id);
++    assert(status == VA_STATUS_SUCCESS);
++
++    obj_surface = SURFACE(gen6_mfd_context->dwa_surface_id);
++    assert(obj_surface);
++    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
++
++    if (!gen6_mfd_context->dwa_slice_data_bo)
++        dri_bo_unreference(gen6_mfd_context->dwa_slice_data_bo);
++
++    gen6_mfd_context->dwa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
++                                                       "WA data",
++                                                       0x1000,
++                                                       0x1000);
++    dri_bo_subdata(gen6_mfd_context->dwa_slice_data_bo,
++                   0,
++                   gen6_dwa_clip.data_size,
++                   gen6_dwa_clip.data);
++}
++
++static void
++gen6_dwa_pipe_mode_select(VADriverContextP ctx,
++                          struct gen6_mfd_context *gen6_mfd_context)
++{
++    struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
++
++    BEGIN_BCS_BATCH(batch, 4);
++    OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2));
++    OUT_BCS_BATCH(batch,
++                  (MFD_MODE_VLD << 16) | /* VLD mode */
++                  (0 << 10) | /* disable Stream-Out */
++                  (0 << 9)  | /* Post Deblocking Output */
++                  (1 << 8)  | /* Pre Deblocking Output */
++                  (0 << 7)  | /* disable TLB prefectch */
++                  (0 << 5)  | /* not in stitch mode */
++                  (MFX_CODEC_DECODE << 4)  | /* decoding mode */
++                  (MFX_FORMAT_MPEG2 << 0));
++    OUT_BCS_BATCH(batch,
++                  (0 << 20) | /* round flag in PB slice */
++                  (0 << 19) | /* round flag in Intra8x8 */
++                  (0 << 7)  | /* expand NOA bus flag */
++                  (1 << 6)  | /* must be 1 */
++                  (0 << 5)  | /* disable clock gating for NOA */
++                  (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
++                  (0 << 3)  | /* terminate if AVC mbdata error occurs */
++                  (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
++                  (0 << 1)  | /* AVC long field motion vector */
++                  (0 << 0));  /* always calculate AVC ILDB boundary strength */
++    OUT_BCS_BATCH(batch, 0);
++    ADVANCE_BCS_BATCH(batch);
++}
++
++static void
++gen6_dwa_surface_state(VADriverContextP ctx,
++                       struct gen6_mfd_context *gen6_mfd_context)
++{
++    struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
++    struct i965_driver_data *i965 = i965_driver_data(ctx);
++    struct object_surface *obj_surface = SURFACE(gen6_mfd_context->dwa_surface_id);
++
++    BEGIN_BCS_BATCH(batch, 6);
++    OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
++    OUT_BCS_BATCH(batch, 0);
++    OUT_BCS_BATCH(batch,
++                  ((obj_surface->orig_width - 1) << 19) |
++                  ((obj_surface->orig_height - 1) << 6));
++    OUT_BCS_BATCH(batch,
++                  (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
++                  (1 << 27) | /* interleave chroma */
++                  (0 << 22) | /* surface object control state, ignored */
++                  ((obj_surface->width - 1) << 3) | /* pitch */
++                  (0 << 2)  | /* must be 0 */
++                  (1 << 1)  | /* must be tiled */
++                  (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
++    OUT_BCS_BATCH(batch,
++                  (0 << 16) | /* X offset for U(Cb), must be 0 */
++                  (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
++    OUT_BCS_BATCH(batch,
++                  (0 << 16) | /* X offset for V(Cr), must be 0 */
++                  (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec */
++    ADVANCE_BCS_BATCH(batch);
++}
++
++static void
++gen6_dwa_pipe_buf_addr_state(VADriverContextP ctx,
++                             struct gen6_mfd_context *gen6_mfd_context)
++{
++    struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
++    struct i965_driver_data *i965 = i965_driver_data(ctx);
++    struct object_surface *obj_surface = SURFACE(gen6_mfd_context->dwa_surface_id);
++    dri_bo *intra_bo;
++    int i;
++
++    intra_bo = dri_bo_alloc(i965->intel.bufmgr,
++                            "intra row store",
++                            128 * 64,
++                            0x1000);
++
++    BEGIN_BCS_BATCH(batch, 24);
++    OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
++    OUT_BCS_RELOC(batch,
++                  obj_surface->bo,
++                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
++                  0);
++    
++    OUT_BCS_BATCH(batch, 0); /* post deblocking */
++
++    OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
++    OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
++
++    OUT_BCS_RELOC(batch,
++                  intra_bo,
++                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
++                  0);
++
++    OUT_BCS_BATCH(batch, 0);
++
++    /* DW 7..22 */
++    for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
++        OUT_BCS_BATCH(batch, 0);
++    }
++
++    OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
++    ADVANCE_BCS_BATCH(batch);
++
++    dri_bo_unreference(intra_bo);
++}
++
++static void
++gen6_dwa_bsp_buf_base_addr_state(VADriverContextP ctx,
++                                 struct gen6_mfd_context *gen6_mfd_context)
++{
++    struct i965_driver_data *i965 = i965_driver_data(ctx);
++    struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
++    dri_bo *bsd_mpc_bo;
++
++    bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
++                              "bsd mpc row store",
++                              11520, /* 1.5 * 120 * 64 */
++                              0x1000);
++
++    BEGIN_BCS_BATCH(batch, 4);
++    OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
++
++    OUT_BCS_RELOC(batch,
++                  bsd_mpc_bo,
++                  I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
++                  0);
++    OUT_BCS_BATCH(batch, 0);
++    OUT_BCS_BATCH(batch, 0);
++    ADVANCE_BCS_BATCH(batch);
++
++    dri_bo_unreference(bsd_mpc_bo);
++}
++
++static void
++gen6_dwa_mpeg2_pic_state(VADriverContextP ctx,
++                         struct gen6_mfd_context *gen6_mfd_context)
++
++{
++    struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
++    unsigned int width_in_mbs = ALIGN(gen6_dwa_clip.width, 16) / 16;
++    unsigned int height_in_mbs = ALIGN(gen6_dwa_clip.height, 16) / 16;
++
++    BEGIN_BCS_BATCH(batch, 4);
++    OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (4 - 2));
++    OUT_BCS_BATCH(batch,
++                  gen6_dwa_clip.f_code << 16 |
++                  gen6_dwa_clip.intra_dc_precision << 14 |
++                  gen6_dwa_clip.picture_structure << 12 |
++                  gen6_dwa_clip.top_field_first << 11 |
++                  gen6_dwa_clip.frame_pred_frame_dct << 10 |
++                  gen6_dwa_clip.concealment_motion_vectors << 9 |
++                  gen6_dwa_clip.q_scale_type << 8 |
++                  gen6_dwa_clip.intra_vlc_format << 7 | 
++                  gen6_dwa_clip.alternate_scan << 6);
++    OUT_BCS_BATCH(batch,
++                  gen6_dwa_clip.picture_coding_type << 9);
++    OUT_BCS_BATCH(batch,
++                  height_in_mbs << 16 |
++                  width_in_mbs);
++    ADVANCE_BCS_BATCH(batch);
++}
++
++static void
++gen6_dwa_mpeg2_qm_state(VADriverContextP ctx,
++                        struct gen6_mfd_context *gen6_mfd_context)
++{
++    struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
++    int i;
++
++    for (i = 0; i < 2; i++) {
++        BEGIN_BCS_BATCH(batch, 18);
++        OUT_BCS_BATCH(batch, MFX_MPEG2_QM_STATE | (18 - 2));
++        OUT_BCS_BATCH(batch, i);
++        intel_batchbuffer_data(batch, gen6_dwa_clip.qm[i], 64);
++        ADVANCE_BCS_BATCH(batch);
++    }
++}
++
++static void
++gen6_dwa_ind_obj_base_addr_state(VADriverContextP ctx,
++                                 struct gen6_mfd_context *gen6_mfd_context)
++{
++    struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
++
++    BEGIN_BCS_BATCH(batch, 11);
++    OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
++    OUT_BCS_RELOC(batch,
++                  gen6_mfd_context->dwa_slice_data_bo,
++                  I915_GEM_DOMAIN_INSTRUCTION, 0,
++                  0);
++    OUT_BCS_BATCH(batch, 0);
++    OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
++    OUT_BCS_BATCH(batch, 0);
++    OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
++    OUT_BCS_BATCH(batch, 0);
++    OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
++    OUT_BCS_BATCH(batch, 0);
++    OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
++    OUT_BCS_BATCH(batch, 0);
++    ADVANCE_BCS_BATCH(batch);
++}
++
++static void
++gen6_dwa_mpeg2_bsd_object(VADriverContextP ctx,
++                          struct gen6_mfd_context *gen6_mfd_context)
++{
++    struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
++
++    BEGIN_BCS_BATCH(batch, 5);
++    OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
++    OUT_BCS_BATCH(batch, 
++                  gen6_dwa_clip.data_size - (gen6_dwa_clip.data_bit_offset >> 3));
++    OUT_BCS_BATCH(batch, gen6_dwa_clip.data_bit_offset >> 3);
++    OUT_BCS_BATCH(batch,
++                  (0 << 24) |
++                  (0 << 16) |
++                  (gen6_dwa_clip.mb_count << 8) |
++                  (1 << 5) |
++                  (1 << 3) |
++                  (gen6_dwa_clip.data_bit_offset & 0x7));
++    OUT_BCS_BATCH(batch,
++                  gen6_dwa_clip.quantiser_scale_code << 24);
++    ADVANCE_BCS_BATCH(batch);
++}
++
++static void
++gen6_mfd_dwa(VADriverContextP ctx,
++             struct gen6_mfd_context *gen6_mfd_context)
++{
++    struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
++    gen6_dwa_init(ctx, gen6_mfd_context);
++    intel_batchbuffer_emit_mi_flush(batch);
++    gen6_dwa_pipe_mode_select(ctx, gen6_mfd_context);
++    gen6_dwa_surface_state(ctx, gen6_mfd_context);
++    gen6_dwa_pipe_buf_addr_state(ctx, gen6_mfd_context);
++    gen6_dwa_bsp_buf_base_addr_state(ctx, gen6_mfd_context);
++    gen6_dwa_mpeg2_qm_state(ctx, gen6_mfd_context);
++    gen6_dwa_mpeg2_pic_state(ctx, gen6_mfd_context);
++    gen6_dwa_ind_obj_base_addr_state(ctx, gen6_mfd_context);
++    gen6_dwa_mpeg2_bsd_object(ctx, gen6_mfd_context);
++}
++
++/* end of workaround */
++
+ static void
+ gen6_mfd_avc_frame_store_index(VADriverContextP ctx,
+                                VAPictureParameterBufferH264 *pic_param,
+@@ -1055,7 +1426,8 @@ gen6_mfd_avc_decode_picture(VADriverContextP ctx,
+         }
+     }
+     
+-    gen6_mfd_avc_phantom_slice(ctx, pic_param, gen6_mfd_context);
++    gen6_mfd_dwa(ctx, gen6_mfd_context);
++
+     intel_batchbuffer_end_atomic(batch);
+     intel_batchbuffer_flush(batch);
+ }
+@@ -1944,6 +2316,8 @@ gen6_mfd_vc1_decode_picture(VADriverContextP ctx,
+         }
+     }
+ 
++    gen6_mfd_dwa(ctx, gen6_mfd_context);
++
+     intel_batchbuffer_end_atomic(batch);
+     intel_batchbuffer_flush(batch);
+ }
+@@ -2031,6 +2405,7 @@ gen6_dec_hw_context_init(VADriverContextP ctx, VAProfile profile)
+     }
+ 
+     gen6_mfd_context->wa_mpeg2_slice_vertical_position = -1;
+-    
++    gen6_mfd_context->dwa_surface_id = VA_INVALID_ID;
++
+     return (struct hw_context *)gen6_mfd_context;
+ }
+diff --git a/src/gen6_mfd.h b/src/gen6_mfd.h
+index de131d6..7c4a619 100644
+--- a/src/gen6_mfd.h
++++ b/src/gen6_mfd.h
+@@ -72,6 +72,9 @@ struct gen6_mfd_context
+     GenBuffer           bitplane_read_buffer;
+ 
+     int                 wa_mpeg2_slice_vertical_position;
++
++    VASurfaceID dwa_surface_id;
++    dri_bo *dwa_slice_data_bo;
+ };
+ 
+ #endif /* _GEN6_MFD_H_ */
+-- 
+1.7.9.5
+
diff --git a/common/recipes-multimedia/libva/libva-intel-driver_1.0.18.bb b/common/recipes-multimedia/libva/libva-intel-driver_1.0.18.bb
index fcd3b6f..d670b47 100644
--- a/common/recipes-multimedia/libva/libva-intel-driver_1.0.18.bb
+++ b/common/recipes-multimedia/libva/libva-intel-driver_1.0.18.bb
@@ -4,5 +4,7 @@ PR = "${INC_PR}.0"
 
 SRC_URI = "http://www.freedesktop.org/software/vaapi/releases/libva-intel-driver/libva-intel-driver-${PV}.tar.bz2"
 
+SRC_URI += "file://0001-Workaround-for-concurrently-playing-VC1-and-H264-vid.patch"
+
 SRC_URI[md5sum] = "13907085223d88d956cdfc282962b7a7"
 SRC_URI[sha256sum] = "789fa2d6e22b9028ce12a89981eb33e57b04301431415149acfb61a49d3a63ee"
-- 
1.7.4.1




More information about the meta-intel mailing list