Most efficient way to search the last X lines of a file?
# Tail
from __future__ import with_statement
find_str = "FIREFOX" # String to find
fname = "g:/autoIt/ActiveWin.log_2" # File to check
with open(fname, "r") as f:
f.seek (0, 2) # Seek @ EOF
fsize = f.tell() # Get Size
f.seek (max (fsize-1024, 0), 0) # Set pos @ last n chars
lines = f.readlines() # Read to end
lines = lines[-10:] # Get last 10 lines
# This returns True if any line is exactly find_str + "\n"
print find_str + "\n" in lines
# If you're searching for a substring
for line in lines:
if find_str in line:
print True
break
Here's an answer like MizardX's, but without its apparent problem of taking quadratic time in the worst case from rescanning the working string repeatedly for newlines as chunks are added.
Compared to the Active State solution (which also seems to be quadratic), this doesn't blow up given an empty file and does one seek per block read instead of two.
Compared to spawning 'tail', this is self-contained. (But 'tail' is best if you have it.)
Compared to grabbing a few kB off the end and hoping it's enough, this works for any line length.
import os
def reversed_lines(file):
"Generate the lines of file in reverse order."
part = ''
for block in reversed_blocks(file):
for c in reversed(block):
if c == '\n' and part:
yield part[::-1]
part = ''
part += c
if part: yield part[::-1]
def reversed_blocks(file, blocksize=4096):
"Generate blocks of file's contents in reverse order."
file.seek(0, os.SEEK_END)
here = file.tell()
while 0 < here:
delta = min(blocksize, here)
here -= delta
file.seek(here, os.SEEK_SET)
yield file.read(delta)
To use it as requested:
from itertools import islice
def check_last_10_lines(file, key):
for line in islice(reversed_lines(file), 10):
if line.rstrip('\n') == key:
print 'FOUND'
break
Edit: changed map() to itertools.imap() in head(). Edit 2: simplified reversed_blocks(). Edit 3: avoid rescanning tail for newlines. Edit 4: rewrote reversed_lines() because str.splitlines() ignores a final '\n', as BrianB noticed (thanks).
Note that in very old Python versions the string concatenation in a loop here will take quadratic time. CPython from at least the last few years avoids this problem automatically.