Insert row into Excel spreadsheet using openpyxl in Python
== Updated to a fully functional version, based on feedback here: groups.google.com/forum/#!topic/openpyxl-users/wHGecdQg3Iw. ==
As the others have pointed out, openpyxl
does not provide this functionality, but I have extended the Worksheet
class as follows to implement inserting rows. Hope this proves useful to others.
def insert_rows(self, row_idx, cnt, above=False, copy_style=True, fill_formulae=True):
"""Inserts new (empty) rows into worksheet at specified row index.
:param row_idx: Row index specifying where to insert new rows.
:param cnt: Number of rows to insert.
:param above: Set True to insert rows above specified row index.
:param copy_style: Set True if new rows should copy style of immediately above row.
:param fill_formulae: Set True if new rows should take on formula from immediately above row, filled with references new to rows.
Usage:
* insert_rows(2, 10, above=True, copy_style=False)
"""
CELL_RE = re.compile("(?P<col>\$?[A-Z]+)(?P<row>\$?\d+)")
row_idx = row_idx - 1 if above else row_idx
def replace(m):
row = m.group('row')
prefix = "$" if row.find("$") != -1 else ""
row = int(row.replace("$",""))
row += cnt if row > row_idx else 0
return m.group('col') + prefix + str(row)
# First, we shift all cells down cnt rows...
old_cells = set()
old_fas = set()
new_cells = dict()
new_fas = dict()
for c in self._cells.values():
old_coor = c.coordinate
# Shift all references to anything below row_idx
if c.data_type == Cell.TYPE_FORMULA:
c.value = CELL_RE.sub(
replace,
c.value
)
# Here, we need to properly update the formula references to reflect new row indices
if old_coor in self.formula_attributes and 'ref' in self.formula_attributes[old_coor]:
self.formula_attributes[old_coor]['ref'] = CELL_RE.sub(
replace,
self.formula_attributes[old_coor]['ref']
)
# Do the magic to set up our actual shift
if c.row > row_idx:
old_coor = c.coordinate
old_cells.add((c.row,c.col_idx))
c.row += cnt
new_cells[(c.row,c.col_idx)] = c
if old_coor in self.formula_attributes:
old_fas.add(old_coor)
fa = self.formula_attributes[old_coor].copy()
new_fas[c.coordinate] = fa
for coor in old_cells:
del self._cells[coor]
self._cells.update(new_cells)
for fa in old_fas:
del self.formula_attributes[fa]
self.formula_attributes.update(new_fas)
# Next, we need to shift all the Row Dimensions below our new rows down by cnt...
for row in range(len(self.row_dimensions)-1+cnt,row_idx+cnt,-1):
new_rd = copy.copy(self.row_dimensions[row-cnt])
new_rd.index = row
self.row_dimensions[row] = new_rd
del self.row_dimensions[row-cnt]
# Now, create our new rows, with all the pretty cells
row_idx += 1
for row in range(row_idx,row_idx+cnt):
# Create a Row Dimension for our new row
new_rd = copy.copy(self.row_dimensions[row-1])
new_rd.index = row
self.row_dimensions[row] = new_rd
for col in range(1,self.max_column):
col = get_column_letter(col)
cell = self.cell('%s%d'%(col,row))
cell.value = None
source = self.cell('%s%d'%(col,row-1))
if copy_style:
cell.number_format = source.number_format
cell.font = source.font.copy()
cell.alignment = source.alignment.copy()
cell.border = source.border.copy()
cell.fill = source.fill.copy()
if fill_formulae and source.data_type == Cell.TYPE_FORMULA:
s_coor = source.coordinate
if s_coor in self.formula_attributes and 'ref' not in self.formula_attributes[s_coor]:
fa = self.formula_attributes[s_coor].copy()
self.formula_attributes[cell.coordinate] = fa
# print("Copying formula from cell %s%d to %s%d"%(col,row-1,col,row))
cell.value = re.sub(
"(\$?[A-Z]{1,3}\$?)%d"%(row - 1),
lambda m: m.group(1) + str(row),
source.value
)
cell.data_type = Cell.TYPE_FORMULA
# Check for Merged Cell Ranges that need to be expanded to contain new cells
for cr_idx, cr in enumerate(self.merged_cell_ranges):
self.merged_cell_ranges[cr_idx] = CELL_RE.sub(
replace,
cr
)
Worksheet.insert_rows = insert_rows
Answering this with the code that I'm now using to achieve the desired result. Note that I am manually inserting the row at position 1, but that should be easy enough to adjust for specific needs. You could also easily tweak this to insert more than one row, and simply populate the rest of the data starting at the relevant position.
Also, note that due to downstream dependencies, we are manually specifying data from 'Sheet1', and the data is getting copied to a new sheet which is inserted at the beginning of the workbook, whilst renaming the original worksheet to 'Sheet1.5'.
EDIT: I've also added (later on) a change to the format_code to fix issues where the default copy operation here removes all formatting: new_cell.style.number_format.format_code = 'mm/dd/yyyy'
. I couldn't find any documentation that this was settable, it was more of a case of trial and error!
Lastly, don't forget this example is saving over the original. You can change the save path where applicable to avoid this.
import openpyxl
wb = openpyxl.load_workbook(file)
old_sheet = wb.get_sheet_by_name('Sheet1')
old_sheet.title = 'Sheet1.5'
max_row = old_sheet.get_highest_row()
max_col = old_sheet.get_highest_column()
wb.create_sheet(0, 'Sheet1')
new_sheet = wb.get_sheet_by_name('Sheet1')
# Do the header.
for col_num in range(0, max_col):
new_sheet.cell(row=0, column=col_num).value = old_sheet.cell(row=0, column=col_num).value
# The row to be inserted. We're manually populating each cell.
new_sheet.cell(row=1, column=0).value = 'DUMMY'
new_sheet.cell(row=1, column=1).value = 'DUMMY'
# Now do the rest of it. Note the row offset.
for row_num in range(1, max_row):
for col_num in range (0, max_col):
new_sheet.cell(row = (row_num + 1), column = col_num).value = old_sheet.cell(row = row_num, column = col_num).value
wb.save(file)
Adding an answer applicable to more recent releases, v2.5+, of openpyxl
:
There's now an insert_rows()
and insert_cols()
.
insert_rows(idx, amount=1)
Insert row or rows before row==idx