How can I left justify text in a pandas DataFrame column in an IPython notebook
This works on Python 3.7 (functools is a part of that release now)
# pylint: disable=C0103,C0200,R0205
from __future__ import print_function
import pandas as pd
import functools
@staticmethod
def displayDataFrame(dataframe, displayNumRows=True, displayIndex=True, leftJustify=True):
# type: (pd.DataFrame, bool, bool, bool) -> None
"""
:param dataframe: pandas DataFrame
:param displayNumRows: If True, show the number or rows in the output.
:param displayIndex: If True, then show the indexes
:param leftJustify: If True, then use technique to format columns left justified.
:return: None
"""
if leftJustify:
formatters = {}
for columnName in list(dataframe.columns):
columnType = type(columnName) # The magic!!
# print("{} => {}".format(columnName, columnType))
if columnType == type(bool):
form = "{{!s:<8}}".format()
elif columnType == type(float):
form = "{{!s:<5}}".format()
else:
max = dataframe[columnName].str.len().max()
form = "{{:<{}s}}".format(max)
formatters[columnName] = functools.partial(str.format, form)
print(dataframe.to_string(index=displayIndex, formatters=formatters), end="\n\n")
else:
print(dataframe.to_string(index=displayIndex), end="\n\n")
if displayNumRows:
print("Num Rows: {}".format(len(dataframe)), end="\n\n")
I like @unutbu's answer (not requiring any additional dependencies). @JS.'s additions are a step in the direction (towards something re-usable).
Since the construction of the formatter dict is the difficult part, let's create a function which creates the formatter dict from a DataFrame and an optional list of columns to format.
def make_lalign_formatter(df, cols=None):
"""
Construct formatter dict to left-align columns.
Parameters
----------
df : pandas.core.frame.DataFrame
The DataFrame to format
cols : None or iterable of strings, optional
The columns of df to left-align. The default, cols=None, will
left-align all the columns of dtype object
Returns
-------
dict
Formatter dictionary
"""
if cols is None:
cols = df.columns[df.dtypes == 'object']
return {col: f'{{:<{df[col].str.len().max()}s}}'.format for col in cols}
Let's create some example data to demonstrate using this function:
import pandas as pd
# Make some data
data = {'First': ['Tom', 'Dick', 'Harry'],
'Last': ['Thumb', 'Whittington', 'Potter'],
'Age': [183, 667, 23]}
# Make into a DataFrame
df = pd.DataFrame(data)
To align all the columns of type object in our DataFrame:
# Left align all columns
print(df.to_string(formatters=make_lalign_formatter(df),
index=False,
justify='left'))
To align only the 'First'
column:
# Left align 'First' column
print(df.to_string(formatters=make_lalign_formatter(df, cols=['First']),
index=False,
justify='left'))
You could use a['Text'].str.len().max()
to compute the length of the longest string in a['Text']
, and use that number, N
, in a left-justified formatter '{:<Ns}'.format
:
In [211]: print(a.to_string(formatters={'Text':'{{:<{}s}}'.format(a['Text'].str.len().max()).format}, index=False))
Text Value
abcdef 12.34
x 4.20
If you're willing to use another library, tabulate will do this -
$ pip install tabulate
and then
from tabulate import tabulate
df = pd.DataFrame ({'Text': ['abcdef', 'x'], 'Value': [12.34, 4.2]})
print(tabulate(df, showindex=False, headers=df.columns))
Text Value
------ -------
abcdef 12.34
x 4.2
It has various other output formats also.