Writing to a particular cell using csv module in python
I agree, this is annoying. I wound up subclassing csv.DictReader. This allows for cell based lookup edit in place, and dump. I have the code posted on activestate: In place csv lookup, manipulation and export
import csv, collections, copy
"""
# CSV TEST FILE 'test.csv'
TBLID,DATETIME,VAL
C1,01:01:2011:00:01:23,5
C2,01:01:2012:00:01:23,8
C3,01:01:2013:00:01:23,4
C4,01:01:2011:01:01:23,9
C5,01:01:2011:02:01:23,1
C6,01:01:2011:03:01:23,5
C7,01:01:2011:00:01:23,6
C8,01:01:2011:00:21:23,8
C9,01:01:2011:12:01:23,1
#usage (saving this cose as CustomDictReader.py)
>>> import CustomDictReader
>>> import pprint
>>> test = CustomDictReader.CSVRW()
>>> success, thedict = test.createCsvDict('TBLID',',',None,'test.csv')
>>> pprint.pprint(dict(thedict))
{'C1': OrderedDict([('TBLID', 'C1'), ('DATETIME', '01:01:2011:00:01:23'), ('VAL', '5')]),
'C2': OrderedDict([('TBLID', 'C2'), ('DATETIME', '01:01:2012:00:01:23'), ('VAL', '8')]),
'C3': OrderedDict([('TBLID', 'C3'), ('DATETIME', '01:01:2013:00:01:23'), ('VAL', '4')]),
'C4': OrderedDict([('TBLID', 'C4'), ('DATETIME', '01:01:2011:01:01:23'), ('VAL', '9')]),
'C5': OrderedDict([('TBLID', 'C5'), ('DATETIME', '01:01:2011:02:01:23'), ('VAL', '1')]),
'C6': OrderedDict([('TBLID', 'C6'), ('DATETIME', '01:01:2011:03:01:23'), ('VAL', '5')]),
'C7': OrderedDict([('TBLID', 'C7'), ('DATETIME', '01:01:2011:00:01:23'), ('VAL', '6')]),
'C8': OrderedDict([('TBLID', 'C8'), ('DATETIME', '01:01:2011:00:21:23'), ('VAL', '8')]),
'C9': OrderedDict([('TBLID', 'C9'), ('DATETIME', '01:01:2011:12:01:23'), ('VAL', '1')])}
>>> thedict.keys()
['C1', 'C2', 'C3', 'C4', 'C5', 'C6', 'C7', 'C8', 'C9']
>>> thedict['C2']['VAL'] = "BOB"
>>> pprint.pprint(dict(thedict))
{'C1': OrderedDict([('TBLID', 'C1'), ('DATETIME', '01:01:2011:00:01:23'), ('VAL', '5')]),
'C2': OrderedDict([('TBLID', 'C2'), ('DATETIME', '01:01:2012:00:01:23'), ('VAL', 'BOB')]),
'C3': OrderedDict([('TBLID', 'C3'), ('DATETIME', '01:01:2013:00:01:23'), ('VAL', '4')]),
'C4': OrderedDict([('TBLID', 'C4'), ('DATETIME', '01:01:2011:01:01:23'), ('VAL', '9')]),
'C5': OrderedDict([('TBLID', 'C5'), ('DATETIME', '01:01:2011:02:01:23'), ('VAL', '1')]),
'C6': OrderedDict([('TBLID', 'C6'), ('DATETIME', '01:01:2011:03:01:23'), ('VAL', '5')]),
'C7': OrderedDict([('TBLID', 'C7'), ('DATETIME', '01:01:2011:00:01:23'), ('VAL', '6')]),
'C8': OrderedDict([('TBLID', 'C8'), ('DATETIME', '01:01:2011:00:21:23'), ('VAL', '8')]),
'C9': OrderedDict([('TBLID', 'C9'), ('DATETIME', '01:01:2011:12:01:23'), ('VAL', '1')])}
>>> test.updateCsvDict(thedict)
>>> test.createCsv('wb')
"""
class CustomDictReader(csv.DictReader):
"""
override the next() function and use an
ordered dict in order to preserve writing back
into the file
"""
def __init__(self, f, fieldnames = None, restkey = None, restval = None, dialect ="excel", *args, **kwds):
csv.DictReader.__init__(self, f, fieldnames = None, restkey = None, restval = None, dialect = "excel", *args, **kwds)
def next(self):
if self.line_num == 0:
# Used only for its side effect.
self.fieldnames
row = self.reader.next()
self.line_num = self.reader.line_num
# unlike the basic reader, we prefer not to return blanks,
# because we will typically wind up with a dict full of None
# values
while row == []:
row = self.reader.next()
d = collections.OrderedDict(zip(self.fieldnames, row))
lf = len(self.fieldnames)
lr = len(row)
if lf < lr:
d[self.restkey] = row[lf:]
elif lf > lr:
for key in self.fieldnames[lr:]:
d[key] = self.restval
return d
class CSVRW(object):
def __init__(self):
self.file_name = ""
self.csv_delim = ""
self.csv_dict = collections.OrderedDict()
def setCsvFileName(self, name):
"""
@brief stores csv file name
@param name- the file name
"""
self.file_name = name
def getCsvFileName(self):
"""
@brief getter
@return returns the file name
"""
return self.file_name
def getCsvDict(self):
"""
@brief getter
@return returns a deep copy of the csv as a dictionary
"""
return copy.deepcopy(self.csv_dict)
def clearCsvDict(self):
"""
@brief resets the dictionary
"""
self.csv_dict = collections.OrderedDict()
def updateCsvDict(self, newCsvDict):
"""
creates a deep copy of the dict passed in and
sets it to the member one
"""
self.csv_dict = copy.deepcopy(newCsvDict)
def createCsvDict(self,dictKey, delim, handle = None, name = None, readMode = 'rb', **kwargs):
"""
@brief create a dict from a csv file where:
the top level keys are the first line in the dict, overrideable w/ **kwargs
each row is a dict
each row can be accessed by the value stored in the column associated w/ dictKey
that is to say, if you want to index into your csv file based on the contents of the
third column, pass the name of that col in as 'dictKey'
@param dictKey - row key whose value will act as an index
@param delim - csv file deliminator
@param handle - file handle (leave as None if you wish to pass in a file name)
@param name - file name (leave as None if you wish to pass in a file handle)
@param readMode - 'r' || 'rb'
@param **kwargs - additional args allowed by the csv module
@return bool - SUCCESS|FAIL
"""
self.csv_delim = delim
try:
if isinstance(handle, file):
self.setCsvFileName(handle.name)
reader = CustomDictReader(handle, delim, **kwargs)
else:
if None == name:
name = self.getCsvFileName()
else:
self.setCsvFileName(name)
reader = CustomDictReader(open(name, readMode), delim, **kwargs)
for row in reader:
self.csv_dict[row[dictKey]] = row
return True, self.getCsvDict()
except IOError:
return False, 'Error opening file'
def createCsv(self, writeMode, outFileName = None, delim = None):
"""
@brief create a csv from self.csv_dict
@param writeMode - 'w' || 'wb'
@param outFileName - file name || file handle
@param delim - csv deliminator
@return none
"""
if None == outFileName:
outFileName = self.file_name
if None == delim:
delim = self.csv_delim
with open(outFileName, writeMode) as fout:
for key in self.csv_dict.values():
fout.write(delim.join(key.keys()) + '\n')
break
for key in self.csv_dict.values():
fout.write(delim.join(key.values()) + '\n')
suppose you have a csv file called mylist.csv with following lines:
a, b, c, d
e, f, g, h
i, j, k, l
if you want to modify 'h' to become 'X', can use this code, need to import csv module:
f = open('mylist.csv', 'r')
reader = csv.reader(f)
mylist = list(reader)
f.close()
mylist[1][3] = 'X'
my_new_list = open('mylist.csv', 'w', newline = '')
csv_writer = csv.writer(my_new_list)
csv_writer.writerows(mylist)
my_new_list.close()
If you want to modify a particular column for each row, just add the for loop to iterate.
The csv module provides facilities to read and write csv files but does not allow the modification specific cells in-place.
Even the csvwriter.writerow(row)
method you highlight in your question does not allow you to identify and overwrite a specific row. Rather it writes the row
parameter to the writer’s file object, in effect it simply appends a row the csv file associated with the writer.
Do not be dissuaded from using the csv module though, it is simple to use and with the primitives provided you could implement the higher level functionality you are looking for relatively easily.
For example take a look at the following csv file:
1,2,3,four,5
1,2,3,four,5
1,2,3,four,5
The word four
is in column 3 (the fourth column but a row is just a list so the indexing is zero based), this can be easily updated to contain the digit 4
with the following program:
import csv
in_file = open("d:/in.csv", "rb")
reader = csv.reader(in_file)
out_file = open("d:/out.csv", "wb")
writer = csv.writer(out_file)
for row in reader:
row[3] = 4
writer.writerow(row)
in_file.close()
out_file.close()
Resulting in the output:
1,2,3,4,5
1,2,3,4,5
1,2,3,4,5
Granted creating some generic function that allows specific rows and columns to be identified and updated is a little more work, but not much more as manipulating a csv file in Python is just manipulating a sequence of lists.