Getting FFProbe Information With Python
Here is technique I think it is both simple to use and easy to parse (tested with ffmpeg 3.x):
import subprocess
import xml.etree
def ffprobe(executable, filename):
'''Runs ``ffprobe`` executable over ``filename``, returns parsed XML
Parameters:
executable (str): Full path leading to ``ffprobe``
filename (str): Full path leading to the file to be probed
Returns:
xml.etree.ElementTree: containing all parsed elements
'''
cmd = [
executable,
'-v', 'quiet',
'-print_format', 'xml', #here is the trick
'-show_format',
'-show_streams',
filename,
]
return xml.etree.ElementTree.fromstring(subprocess.check_output(cmd))
The data available comes from a string representation that looks like this:
<ffprobe>
<streams>
<stream index="0" codec_name="h264" codec_long_name="H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10" profile="Constrained Baseline" codec_type="video" codec_time_base="1/60" codec_tag_string="avc1" codec_tag="0x31637661" width="560" height="320" coded_width="560" coded_height="320" has_b_frames="0" sample_aspect_ratio="0:1" display_aspect_ratio="0:1" pix_fmt="yuv420p" level="30" color_range="tv" color_space="bt709" color_transfer="bt709" color_primaries="bt709" chroma_location="left" refs="1" is_avc="true" nal_length_size="4" r_frame_rate="30/1" avg_frame_rate="30/1" time_base="1/90000" start_pts="0" start_time="0.000000" duration_ts="498000" duration="5.533333" bit_rate="465641" bits_per_raw_sample="8" nb_frames="166">
<disposition default="1" dub="0" original="0" comment="0" lyrics="0" karaoke="0" forced="0" hearing_impaired="0" visual_impaired="0" clean_effects="0" attached_pic="0" timed_thumbnails="0"/>
<tag key="creation_time" value="2010-03-20T21:29:11.000000Z"/>
<tag key="language" value="und"/>
<tag key="encoder" value="JVT/AVC Coding"/>
</stream>
<stream>...</stream>
</streams>
<format filename="/Users/andre/Projects/qnap/librarian/librarian/data/movie.mp4" nb_streams="2" nb_programs="0" format_name="mov,mp4,m4a,3gp,3g2,mj2" format_long_name="QuickTime / MOV" start_time="0.000000" duration="5.568000" size="383631" bit_rate="551193" probe_score="100">
<tag key="major_brand" value="mp42"/>
<tag key="minor_version" value="0"/>
<tag key="compatible_brands" value="mp42isomavc1"/>
<tag key="creation_time" value="2010-03-20T21:29:11.000000Z"/>
<tag key="encoder" value="HandBrake 0.9.4 2009112300"/>
</format>
</ffprobe>
There is FFProbe wrapper for Python (https://pypi.org/project/ffprobe/).
You can install it easily: (sudo) pip install ffprobe
A typical usage:
from ffprobe import FFProbe
metadata = FFProbe("example.mp4")
An alternative to FFProbe is pymediainfo library (https://pymediainfo.readthedocs.io/en/stable/). You can find usage in the documentation site.
The json output from ffprobe
is simple enough to parse in python having to install a third-party library.
#!/usr/bin/env python
import argparse
import subprocess
import sys
from pathlib import Path
from typing import NamedTuple
class FFProbeResult(NamedTuple):
return_code: int
json: str
error: str
def ffprobe(file_path) -> FFProbeResult:
command_array = ["ffprobe",
"-v", "quiet",
"-print_format", "json",
"-show_format",
"-show_streams",
file_path]
result = subprocess.run(command_array, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
return FFProbeResult(return_code=result.returncode,
json=result.stdout,
error=result.stderr)
Example usage, echoing the resulting json string:
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='View ffprobe output')
parser.add_argument('-i', '--input', help='File Name', required=True)
args = parser.parse_args()
if not Path(args.input).is_file():
print("could not read file: " + args.input)
exit(1)
print('File: {}'.format(args.input))
ffprobe_result = ffprobe(file_path=args.input))
if ffprobe_result.return_code == 0:
# Print the raw json string
print(ffprobe_result.json)
# or print a summary of each stream
d = json.loads(ffprobe_result.json)
streams = d.get("streams", [])
for stream in streams:
print(f'{stream.get("codec_type", "unknown")}: {stream.get("codec_long_name")}')
else:
print("ERROR")
print(ffprobe_result.error, file=sys.stderr)
So:
$ ./ffprobe.py -i input.mp4
File: input.mp4
{
"streams": [
{
"index": 0,
"codec_name": "h264",
"codec_long_name": "H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10",
"profile": "High",
"codec_type": "video",
"codec_time_base": "1001/48000",
"codec_tag_string": "avc1",
"codec_tag": "0x31637661",
"width": 1920,
"height": 1080,
"coded_width": 1920,
"coded_height": 1088,
"has_b_frames": 2,
"pix_fmt": "yuv420p",
"level": 40,
"color_range": "tv",
"color_space": "bt709",
"color_transfer": "bt709",
"color_primaries": "bt709",
"chroma_location": "left",
"refs": 1,
"is_avc": "true",
"nal_length_size": "4",
"r_frame_rate": "24000/1001",
"avg_frame_rate": "24000/1001",
"time_base": "1/24000",
"start_pts": 0,
"start_time": "0.000000",
"duration_ts": 442440,
"duration": "18.435000",
"bit_rate": "4970031",
"bits_per_raw_sample": "8",
"nb_frames": "442",
"disposition": {
"default": 1,
"dub": 0,
"original": 0,
"comment": 0,
"lyrics": 0,
"karaoke": 0,
"forced": 0,
"hearing_impaired": 0,
"visual_impaired": 0,
"clean_effects": 0,
"attached_pic": 0,
"timed_thumbnails": 0
},
"tags": {
"creation_time": "2020-05-20T15:37:59.000000Z",
"language": "und",
"handler_name": "L-SMASH Video Handler",
"encoder": "AVC Coding"
}
},
{
"index": 1,
"codec_name": "aac",
"codec_long_name": "AAC (Advanced Audio Coding)",
"profile": "LC",
"codec_type": "audio",
"codec_time_base": "1/48000",
"codec_tag_string": "mp4a",
"codec_tag": "0x6134706d",
"sample_fmt": "fltp",
"sample_rate": "48000",
"channels": 2,
"channel_layout": "stereo",
"bits_per_sample": 0,
"r_frame_rate": "0/0",
"avg_frame_rate": "0/0",
"time_base": "1/48000",
"start_pts": 0,
"start_time": "0.000000",
"duration_ts": 885760,
"duration": "18.453333",
"bit_rate": "137011",
"max_bit_rate": "140304",
"nb_frames": "865",
"disposition": {
"default": 1,
"dub": 0,
"original": 0,
"comment": 0,
"lyrics": 0,
"karaoke": 0,
"forced": 0,
"hearing_impaired": 0,
"visual_impaired": 0,
"clean_effects": 0,
"attached_pic": 0,
"timed_thumbnails": 0
},
"tags": {
"creation_time": "2020-05-20T15:37:59.000000Z",
"language": "und",
"handler_name": "L-SMASH Audio Handler"
}
}
],
"format": {
"filename": "input.mp4",
"nb_streams": 2,
"nb_programs": 0,
"format_name": "mov,mp4,m4a,3gp,3g2,mj2",
"format_long_name": "QuickTime / MOV",
"start_time": "0.000000",
"duration": "18.453333",
"size": "11779747",
"bit_rate": "5106826",
"probe_score": 100,
"tags": {
"major_brand": "mp42",
"minor_version": "0",
"compatible_brands": "mp42mp41isomavc1",
"creation_time": "2020-05-20T15:37:59.000000Z"
}
}
}
video: H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10
audio: AAC (Advanced Audio Coding)
There are a few problems in your code:
- args list to Popen has last argument as
-i filename
which is a syntax error use'-i '+filename
instead shell=True
is usually not needed and is unnecessary burden.
Other than that it seems to be working, are you not seeing output after fixing #1 ?
Edit: Looks like you are having problem with ffprobe commandline, so I installed it and changes you require are:
- My ffprobe (ffprobe 0.7.3-4:0.7.3-0ubuntu0.11.10.1) doesn't seems to accept
-i
flag, input file is just passed as last argument. - you need to pass
-loglevel
and option of loglevelquiet
as separate arguments i.e.[..., '-loglevel', 'quiet',..]
So after these changes here is a sample script:
#!/usr/bin/python
import os, sys, subprocess, shlex, re
from subprocess import call
def probe_file(filename):
cmnd = ['ffprobe', '-show_format', '-pretty', '-loglevel', 'quiet', filename]
p = subprocess.Popen(cmnd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
print filename
out, err = p.communicate()
print "==========output=========="
print out
if err:
print "========= error ========"
print err
probe_file('drop.avi')
And I see the correct output:
==========output==========
[FORMAT]
filename=drop.avi
nb_streams=1
format_name=avi
format_long_name=AVI format
start_time=0:00:00.000000
duration=0:00:06.066667
size=660.000 Kibyte
bit_rate=891.217 Kbit/s
[/FORMAT]
========= error ========
ffprobe version 0.7.3-4:0.7.3-0ubuntu0.11.10.1, Copyright (c) 2007-2011 the Libav developers
built on Jan 4 2012 16:08:51 with gcc 4.6.1
configuration: --extra-version='4:0.7.3-0ubuntu0.11.10.1' --arch=amd64 --prefix=/usr --enable-vdpau --enable-bzlib --enable-libgsm --enable-libschroedinger --enable-libspeex --enable-libtheora --enable-libvorbis --enable-pthreads --enable-zlib --enable-libvpx --enable-runtime-cpudetect --enable-vaapi --enable-gpl --enable-postproc --enable-swscale --enable-x11grab --enable-libdc1394 --enable-shared --disable-static
libavutil 51. 7. 0 / 51. 7. 0
libavcodec 53. 6. 0 / 53. 6. 0
libavformat 53. 3. 0 / 53. 3. 0
libavdevice 53. 0. 0 / 53. 0. 0
libavfilter 2. 4. 0 / 2. 4. 0
libswscale 2. 0. 0 / 2. 0. 0
libpostproc 52. 0. 0 / 52. 0. 0
Unsupported codec with id 114 for input stream 0