Pandas Bad Lines Warning Capture
I think it isn't implemented to pandas.
source1, source2
My solutions:
1. Pre or after processing
import pandas as pd
import csv
df = pd.read_csv('data.csv', warn_bad_lines=True, error_bad_lines=False)
#compare length of rows by recommended value:
RECOMMENDED = 3
with open('data.csv') as csv_file:
reader = csv.reader(csv_file, delimiter=',')
for row in reader:
if (len(row) != RECOMMENDED):
print ("Length of row is: %r" % len(row) )
print row
#compare length of rows by length of columns in df
lencols = len(df.columns)
print lencols
with open('data.csv') as csv_file:
reader = csv.reader(csv_file, delimiter=',')
for row in reader:
if (len(row) != lencols):
print ("Length of row is: %r" % len(row) )
print row
2. Replaces sys.stdout
import pandas as pd
import os
import sys
class RedirectStdStreams(object):
def __init__(self, stdout=None, stderr=None):
self._stdout = stdout or sys.stdout
self._stderr = stderr or sys.stderr
def __enter__(self):
self.old_stdout, self.old_stderr = sys.stdout, sys.stderr
self.old_stdout.flush(); self.old_stderr.flush()
sys.stdout, sys.stderr = self._stdout, self._stderr
def __exit__(self, exc_type, exc_value, traceback):
self._stdout.flush(); self._stderr.flush()
sys.stdout = self.old_stdout
sys.stderr = self.old_stderr
if __name__ == '__main__':
devnull = open('log.txt', 'w')
#replaces sys.stdout, sys.stderr, see http://stackoverflow.com/a/6796752/2901002
with RedirectStdStreams(stdout=devnull, stderr=devnull):
df = pd.read_csv('data.csv', warn_bad_lines=True, error_bad_lines=False)
I can't help you with older than Python 3, but I've had very good success with the following:
import pandas as pd
from contextlib import redirect_stderr
import io
# Redirect stderr to something we can report on.
f = io.StringIO()
with redirect_stderr(f):
df = pd.read_csv(
new_file_name, header=None, error_bad_lines=False, warn_bad_lines=True, dtype=header_types
)
if f.getvalue():
logger.warning("Had parsing errors: {}".format(f.getvalue()))
I searched for this issue a number of times and kept being pointed to this questions. Hope it helps someone else, later on.