Getting FFProbe Information With Python

Here is technique I think it is both simple to use and easy to parse (tested with ffmpeg 3.x):

import subprocess
import xml.etree

def ffprobe(executable, filename):
    '''Runs ``ffprobe`` executable over ``filename``, returns parsed XML

    Parameters:

        executable (str): Full path leading to ``ffprobe``
        filename (str): Full path leading to the file to be probed

    Returns:

        xml.etree.ElementTree: containing all parsed elements

    '''

    cmd = [
        executable,
        '-v', 'quiet',
        '-print_format', 'xml', #here is the trick
        '-show_format',
        '-show_streams',
        filename,
        ]

    return xml.etree.ElementTree.fromstring(subprocess.check_output(cmd))

The data available comes from a string representation that looks like this:

 <ffprobe>
   <streams>
     <stream index="0" codec_name="h264" codec_long_name="H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10" profile="Constrained Baseline" codec_type="video" codec_time_base="1/60" codec_tag_string="avc1" codec_tag="0x31637661" width="560" height="320" coded_width="560" coded_height="320" has_b_frames="0" sample_aspect_ratio="0:1" display_aspect_ratio="0:1" pix_fmt="yuv420p" level="30" color_range="tv" color_space="bt709" color_transfer="bt709" color_primaries="bt709" chroma_location="left" refs="1" is_avc="true" nal_length_size="4" r_frame_rate="30/1" avg_frame_rate="30/1" time_base="1/90000" start_pts="0" start_time="0.000000" duration_ts="498000" duration="5.533333" bit_rate="465641" bits_per_raw_sample="8" nb_frames="166">
       <disposition default="1" dub="0" original="0" comment="0" lyrics="0" karaoke="0" forced="0" hearing_impaired="0" visual_impaired="0" clean_effects="0" attached_pic="0" timed_thumbnails="0"/>
       <tag key="creation_time" value="2010-03-20T21:29:11.000000Z"/>
       <tag key="language" value="und"/>
       <tag key="encoder" value="JVT/AVC Coding"/>
     </stream>
     <stream>...</stream>
   </streams>
   <format filename="/Users/andre/Projects/qnap/librarian/librarian/data/movie.mp4" nb_streams="2" nb_programs="0" format_name="mov,mp4,m4a,3gp,3g2,mj2" format_long_name="QuickTime / MOV" start_time="0.000000" duration="5.568000" size="383631" bit_rate="551193" probe_score="100">
     <tag key="major_brand" value="mp42"/>
     <tag key="minor_version" value="0"/>
     <tag key="compatible_brands" value="mp42isomavc1"/>
     <tag key="creation_time" value="2010-03-20T21:29:11.000000Z"/>
     <tag key="encoder" value="HandBrake 0.9.4 2009112300"/>
   </format>
 </ffprobe>

There is FFProbe wrapper for Python (https://pypi.org/project/ffprobe/).

You can install it easily: (sudo) pip install ffprobe

A typical usage:

from ffprobe import FFProbe
metadata = FFProbe("example.mp4")

An alternative to FFProbe is pymediainfo library (https://pymediainfo.readthedocs.io/en/stable/). You can find usage in the documentation site.


The json output from ffprobe is simple enough to parse in python having to install a third-party library.

#!/usr/bin/env python

import argparse
import subprocess
import sys
from pathlib import Path
from typing import NamedTuple


class FFProbeResult(NamedTuple):
    return_code: int
    json: str
    error: str


def ffprobe(file_path) -> FFProbeResult:
    command_array = ["ffprobe",
                     "-v", "quiet",
                     "-print_format", "json",
                     "-show_format",
                     "-show_streams",
                     file_path]
    result = subprocess.run(command_array, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
    return FFProbeResult(return_code=result.returncode,
                         json=result.stdout,
                         error=result.stderr)

Example usage, echoing the resulting json string:

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='View ffprobe output')
    parser.add_argument('-i', '--input', help='File Name', required=True)
    args = parser.parse_args()
    if not Path(args.input).is_file():
        print("could not read file: " + args.input)
        exit(1)
    print('File:       {}'.format(args.input))
    ffprobe_result = ffprobe(file_path=args.input))
    if ffprobe_result.return_code == 0:
        # Print the raw json string
        print(ffprobe_result.json)

        # or print a summary of each stream
        d = json.loads(ffprobe_result.json)
        streams = d.get("streams", [])
        for stream in streams:
            print(f'{stream.get("codec_type", "unknown")}: {stream.get("codec_long_name")}')

    else:
        print("ERROR")
        print(ffprobe_result.error, file=sys.stderr)

So:

$ ./ffprobe.py -i input.mp4
File:       input.mp4
{
  "streams": [
    {
      "index": 0,
      "codec_name": "h264",
      "codec_long_name": "H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10",
      "profile": "High",
      "codec_type": "video",
      "codec_time_base": "1001/48000",
      "codec_tag_string": "avc1",
      "codec_tag": "0x31637661",
      "width": 1920,
      "height": 1080,
      "coded_width": 1920,
      "coded_height": 1088,
      "has_b_frames": 2,
      "pix_fmt": "yuv420p",
      "level": 40,
      "color_range": "tv",
      "color_space": "bt709",
      "color_transfer": "bt709",
      "color_primaries": "bt709",
      "chroma_location": "left",
      "refs": 1,
      "is_avc": "true",
      "nal_length_size": "4",
      "r_frame_rate": "24000/1001",
      "avg_frame_rate": "24000/1001",
      "time_base": "1/24000",
      "start_pts": 0,
      "start_time": "0.000000",
      "duration_ts": 442440,
      "duration": "18.435000",
      "bit_rate": "4970031",
      "bits_per_raw_sample": "8",
      "nb_frames": "442",
      "disposition": {
        "default": 1,
        "dub": 0,
        "original": 0,
        "comment": 0,
        "lyrics": 0,
        "karaoke": 0,
        "forced": 0,
        "hearing_impaired": 0,
        "visual_impaired": 0,
        "clean_effects": 0,
        "attached_pic": 0,
        "timed_thumbnails": 0
      },
      "tags": {
        "creation_time": "2020-05-20T15:37:59.000000Z",
        "language": "und",
        "handler_name": "L-SMASH Video Handler",
        "encoder": "AVC Coding"
      }
    },
    {
      "index": 1,
      "codec_name": "aac",
      "codec_long_name": "AAC (Advanced Audio Coding)",
      "profile": "LC",
      "codec_type": "audio",
      "codec_time_base": "1/48000",
      "codec_tag_string": "mp4a",
      "codec_tag": "0x6134706d",
      "sample_fmt": "fltp",
      "sample_rate": "48000",
      "channels": 2,
      "channel_layout": "stereo",
      "bits_per_sample": 0,
      "r_frame_rate": "0/0",
      "avg_frame_rate": "0/0",
      "time_base": "1/48000",
      "start_pts": 0,
      "start_time": "0.000000",
      "duration_ts": 885760,
      "duration": "18.453333",
      "bit_rate": "137011",
      "max_bit_rate": "140304",
      "nb_frames": "865",
      "disposition": {
        "default": 1,
        "dub": 0,
        "original": 0,
        "comment": 0,
        "lyrics": 0,
        "karaoke": 0,
        "forced": 0,
        "hearing_impaired": 0,
        "visual_impaired": 0,
        "clean_effects": 0,
        "attached_pic": 0,
        "timed_thumbnails": 0
      },
      "tags": {
        "creation_time": "2020-05-20T15:37:59.000000Z",
        "language": "und",
        "handler_name": "L-SMASH Audio Handler"
      }
    }
  ],
  "format": {
    "filename": "input.mp4",
    "nb_streams": 2,
    "nb_programs": 0,
    "format_name": "mov,mp4,m4a,3gp,3g2,mj2",
    "format_long_name": "QuickTime / MOV",
    "start_time": "0.000000",
    "duration": "18.453333",
    "size": "11779747",
    "bit_rate": "5106826",
    "probe_score": 100,
    "tags": {
      "major_brand": "mp42",
      "minor_version": "0",
      "compatible_brands": "mp42mp41isomavc1",
      "creation_time": "2020-05-20T15:37:59.000000Z"
    }
  }
}

video: H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10
audio: AAC (Advanced Audio Coding)

There are a few problems in your code:

  1. args list to Popen has last argument as -i filename which is a syntax error use '-i '+filename instead
  2. shell=True is usually not needed and is unnecessary burden.

Other than that it seems to be working, are you not seeing output after fixing #1 ?

Edit: Looks like you are having problem with ffprobe commandline, so I installed it and changes you require are:

  1. My ffprobe (ffprobe 0.7.3-4:0.7.3-0ubuntu0.11.10.1) doesn't seems to accept -i flag, input file is just passed as last argument.
  2. you need to pass -loglevel and option of loglevel quiet as separate arguments i.e. [..., '-loglevel', 'quiet',..]

So after these changes here is a sample script:

#!/usr/bin/python
import os, sys, subprocess, shlex, re
from subprocess import call
def probe_file(filename):
    cmnd = ['ffprobe', '-show_format', '-pretty', '-loglevel', 'quiet', filename]
    p = subprocess.Popen(cmnd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    print filename
    out, err =  p.communicate()
    print "==========output=========="
    print out
    if err:
        print "========= error ========"
        print err

probe_file('drop.avi')

And I see the correct output:

==========output==========
[FORMAT]
filename=drop.avi
nb_streams=1
format_name=avi
format_long_name=AVI format
start_time=0:00:00.000000
duration=0:00:06.066667
size=660.000 Kibyte
bit_rate=891.217 Kbit/s
[/FORMAT]

========= error ========
ffprobe version 0.7.3-4:0.7.3-0ubuntu0.11.10.1, Copyright (c) 2007-2011 the Libav developers
  built on Jan  4 2012 16:08:51 with gcc 4.6.1
  configuration: --extra-version='4:0.7.3-0ubuntu0.11.10.1' --arch=amd64 --prefix=/usr --enable-vdpau --enable-bzlib --enable-libgsm --enable-libschroedinger --enable-libspeex --enable-libtheora --enable-libvorbis --enable-pthreads --enable-zlib --enable-libvpx --enable-runtime-cpudetect --enable-vaapi --enable-gpl --enable-postproc --enable-swscale --enable-x11grab --enable-libdc1394 --enable-shared --disable-static
  libavutil    51.  7. 0 / 51.  7. 0
  libavcodec   53.  6. 0 / 53.  6. 0
  libavformat  53.  3. 0 / 53.  3. 0
  libavdevice  53.  0. 0 / 53.  0. 0
  libavfilter   2.  4. 0 /  2.  4. 0
  libswscale    2.  0. 0 /  2.  0. 0
  libpostproc  52.  0. 0 / 52.  0. 0
Unsupported codec with id 114 for input stream 0