See the question and my original answer on StackOverflow

First of all, the code you use is derived from this repo https://github.com/smourier/MFDecodeH264 which demonstrated H264 decoding w/o a hardware transform.

Now, look at the remark in the original code:

You can check in decoder attributes that MF_MT_FIXED_SIZE_SAMPLES is set to TRUE. Calling GetOutputStreamInfo this will tell you the MFT cannot provide samples as MFT_OUTPUT_STREAM_CAN_PROVIDE_SAMPLES and MFT_OUTPUT_STREAM_PROVIDES_SAMPLES are not set So we don't know enough information yet, we'll feed input samples until we get MF_E_TRANSFORM_STREAM_CHANGE and then we'll provide a sample as per doc

What happens is, as soon as you set a valid D3D manager, this remark is not true anymore, if you call GetOutputStreamInfo, it will tell you that the MFT now has MFT_OUTPUT_STREAM_PROVIDES_SAMPLES set.

So in fact the code is easier, since we don't have to allocate any sample, we just need to make sure we release them once we've finished with it. Here is a version that now works:

#include <windows.h>
#include <atlbase.h>
#include <mfapi.h>
#include <mferror.h>
#include <mfidl.h>
#include <cstdlib>
#include <d3d11.h>

#pragma comment(lib, "mfplat.lib")
#pragma comment(lib, "mfuuid.lib")
#pragma comment(lib, "d3d11.lib")

#define HRCHECK(__expr) {auto __hr=(__expr);if(FAILED(__hr)){wprintf(L"FAILURE 0x%08X (%i)\n\tline: %u file: '%s'\n\texpr: '" _CRT_WIDE(#__expr) L"'\n",__hr,__hr,__LINE__,_CRT_WIDE(__FILE__));_CrtDbgBreak();}}
#define WIN32CHECK(__expr) {if(!(__expr)){auto __hr=HRESULT_FROM_WIN32(GetLastError());{wprintf(L"FAILURE 0x%08X (%i)\n\tline: %u file: '%s'\n\texpr: '" _CRT_WIDE(#__expr) L"'\n",__hr,__hr,__LINE__,_CRT_WIDE(__FILE__));_CrtDbgBreak();}}}

static HRESULT SetOutputType(IMFTransform* transform, GUID format)
{
  DWORD index = 0;
  do
  {
    CComPtr<IMFMediaType> outputType;
    auto hr = transform->GetOutputAvailableType(0, index++, &outputType);
    if (FAILED(hr))
      return hr;

    GUID guid;
    if (SUCCEEDED(outputType->GetGUID(MF_MT_SUBTYPE, &guid)) && guid == format)
    {
      HRCHECK(transform->SetOutputType(0, outputType, 0));
      return S_OK;
    }
  } while (true);
}

int main()
{
  HRCHECK(CoInitialize(nullptr));
  {
    HRCHECK(MFStartup(MF_VERSION));
    // open file
    auto file = CreateFile(L"xy.h264", GENERIC_READ, 0, nullptr, OPEN_EXISTING, 0, nullptr);
    WIN32CHECK(file != INVALID_HANDLE_VALUE);

    CComPtr<IMFTransform> decoder;
    HRCHECK(decoder.CoCreateInstance(CLSID_MSH264DecoderMFT));

    // input type is H264
    CComPtr<IMFMediaType> inputType;
    HRCHECK(MFCreateMediaType(&inputType));
    HRCHECK(inputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video));
    HRCHECK(inputType->SetGUID(MF_MT_SUBTYPE, MFVideoFormat_H264_ES));
    HRCHECK(decoder->SetInputType(0, inputType, 0)); // video is id 0

    // get (and set) NV12 output type (could be I420, IYUV, YUY2, YV12)
    HRCHECK(SetOutputType(decoder, MFVideoFormat_NV12));

    CComPtr<ID3D11Device>device;
    HRCHECK(D3D11CreateDevice(nullptr, D3D_DRIVER_TYPE_HARDWARE, nullptr, D3D11_CREATE_DEVICE_VIDEO_SUPPORT | D3D11_CREATE_DEVICE_DEBUG, nullptr, 0, D3D11_SDK_VERSION, &device, nullptr, nullptr));

    UINT resetToken = 0;
    CComPtr<IMFDXGIDeviceManager> deviceManager;
    HRCHECK(MFCreateDXGIDeviceManager(&resetToken, &deviceManager));
    HRCHECK(deviceManager->ResetDevice(device, resetToken));
    HRCHECK(decoder->ProcessMessage(MFT_MESSAGE_SET_D3D_MANAGER, (ULONG_PTR)deviceManager.p));

    // At this point, after having set the D3D manager, calling GetOutputStreamInfo
    // will tell you the MFT provides samples as MFT_OUTPUT_STREAM_PROVIDES_SAMPLES is set

    do
    {
      // get a random chunk size between 500 and 1500 (simulate network)
      DWORD chunkSize = 500 + (1000 * (RAND_MAX - std::rand())) / RAND_MAX;

      // create an MF input buffer & read into it
      CComPtr<IMFMediaBuffer> inputBuffer;
      HRCHECK(MFCreateMemoryBuffer(chunkSize, &inputBuffer));
      BYTE* chunk;
      HRCHECK(inputBuffer->Lock(&chunk, nullptr, nullptr));
      DWORD read;
      WIN32CHECK(ReadFile(file, chunk, chunkSize, &read, nullptr));
      HRCHECK(inputBuffer->SetCurrentLength(read));
      HRCHECK(inputBuffer->Unlock());
      if (read)
      {
        CComPtr<IMFSample> inputSample;
        HRCHECK(MFCreateSample(&inputSample));
        HRCHECK(inputSample->AddBuffer(inputBuffer));

        auto hr = decoder->ProcessInput(0, inputSample, 0);
        if (hr != MF_E_NOTACCEPTING) // just go on
        {
          HRCHECK(hr);
        }
      }
      else
      {
        // end of file, ask decoder to process all data from previous calls
        HRCHECK(decoder->ProcessMessage(MFT_MESSAGE_COMMAND_DRAIN, 0));
      }

      MFT_OUTPUT_DATA_BUFFER outputBuffer{};
      DWORD status = 0;
      auto hr = decoder->ProcessOutput(0, 1, &outputBuffer, &status);
      if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) // just go on
      {
        if (!read) // file is all read
          break;

        continue;
      }

      if (hr == MF_E_TRANSFORM_STREAM_CHANGE) // just go on
        continue;

      HRCHECK(hr);

      // sample is allocated by transform, we just need to attach to it
      // so release will be automatic
      CComPtr<IMFSample> sample;
      sample.Attach(outputBuffer.pSample);

      LONGLONG time, duration;
      HRCHECK(outputBuffer.pSample->GetSampleTime(&time));
      HRCHECK(outputBuffer.pSample->GetSampleDuration(&duration));
      wprintf(L"Sample time: %I64u ms duration: %I64u ms\n", time / 10000, duration / 10000);
    } while (true);

    // close file
    CloseHandle(file);
    HRCHECK(MFShutdown());
  }
  CoUninitialize();
  return 0;
}