Java 7 watchservice get file change offset

For what it is worth, I have hacked a little proof of concept which is able to

  • detect added, modified and deleted files in a watched directory,
  • displaying unified diffs for each change (also full diffs when files were added/deleted),
  • keeping track of successive changes by keeping a shadow copy of the source directory,
  • work in a user-defined rhythm (default is 5 seconds) so as not to print too many small diffs in a short period of time, but rather somewhat bigger ones once in a while.

There are several limitations which would be impediments in production environments:

  • In order to not complicate the sample code more than necessary, subdirectories are copied at the beginning when the shadow directory is created (because I have recycled an existing method to create a deep directory copy), but ignored during runtime. Only files right below the watched directory are being monitored so as to avoid recursion.
  • Your requirement not to use external libraries is not met because I really wanted to avoid re-inventing the wheel for unified diff creation.
  • This solution's biggest advantage - it is able to detect changes anywhere in a text file, not only at the end of file like tail -f - is also its biggest disadvantage: Whenever a file changes it must be fully shadow-copied because otherwise the program cannot detect the subsequent change. So I would not recommend this solution for very big files.

How to build:

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>de.scrum-master.tools</groupId>
    <artifactId>SO_WatchServiceChangeLocationInFile</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    </properties>

    <build>
        <plugins>
            <plugin>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.1</version>
                <configuration>
                    <source>1.7</source>
                    <target>1.7</target>
                </configuration>
            </plugin>
        </plugins>
    </build>

    <dependencies>
        <dependency>
            <groupId>com.googlecode.java-diff-utils</groupId>
            <artifactId>diffutils</artifactId>
            <version>1.3.0</version>
        </dependency>
    </dependencies>
</project>

Source code (sorry, a bit lengthy):

package de.scrum_master.app;

import difflib.DiffUtils;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.*;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.LinkedList;
import java.util.List;

import static java.nio.file.StandardWatchEventKinds.*;

public class FileChangeWatcher {
    public static final String DEFAULT_WATCH_DIR = "watch-dir";
    public static final String DEFAULT_SHADOW_DIR = "shadow-dir";
    public static final int DEFAULT_WATCH_INTERVAL = 5;

    private Path watchDir;
    private Path shadowDir;
    private int watchInterval;
    private WatchService watchService;

    public FileChangeWatcher(Path watchDir, Path shadowDir, int watchInterval) throws IOException {
        this.watchDir = watchDir;
        this.shadowDir = shadowDir;
        this.watchInterval = watchInterval;
        watchService = FileSystems.getDefault().newWatchService();
    }

    public void run() throws InterruptedException, IOException {
        prepareShadowDir();
        watchDir.register(watchService, ENTRY_CREATE, ENTRY_MODIFY, ENTRY_DELETE);
        while (true) {
            WatchKey watchKey = watchService.take();
            for (WatchEvent<?> event : watchKey.pollEvents()) {
                Path oldFile = shadowDir.resolve((Path) event.context());
                Path newFile = watchDir.resolve((Path) event.context());
                List<String> oldContent;
                List<String> newContent;
                WatchEvent.Kind<?> eventType = event.kind();
                if (!(Files.isDirectory(newFile) || Files.isDirectory(oldFile))) {
                    if (eventType == ENTRY_CREATE) {
                        if (!Files.isDirectory(newFile))
                            Files.createFile(oldFile);
                    } else if (eventType == ENTRY_MODIFY) {
                        Thread.sleep(200);
                        oldContent = fileToLines(oldFile);
                        newContent = fileToLines(newFile);
                        printUnifiedDiff(newFile, oldFile, oldContent, newContent);
                        try {
                            Files.copy(newFile, oldFile, StandardCopyOption.REPLACE_EXISTING);
                        } catch (Exception e) {
                            e.printStackTrace();
                        }
                    } else if (eventType == ENTRY_DELETE) {
                        try {
                            oldContent = fileToLines(oldFile);
                            newContent = new LinkedList<>();
                            printUnifiedDiff(newFile, oldFile, oldContent, newContent);
                            Files.deleteIfExists(oldFile);
                        } catch (Exception e) {
                            e.printStackTrace();
                        }
                    }
                }
            }
            watchKey.reset();
            Thread.sleep(1000 * watchInterval);
        }
    }

    private void prepareShadowDir() throws IOException {
        recursiveDeleteDir(shadowDir);
        Runtime.getRuntime().addShutdownHook(
            new Thread() {
                @Override
                public void run() {
                    try {
                        System.out.println("Cleaning up shadow directory " + shadowDir);
                        recursiveDeleteDir(shadowDir);
                    } catch (IOException e) {
                        e.printStackTrace();
                    }
                }
            }
        );
        recursiveCopyDir(watchDir, shadowDir);
    }

    public static void recursiveDeleteDir(Path directory) throws IOException {
        if (!directory.toFile().exists())
            return;
        Files.walkFileTree(directory, new SimpleFileVisitor<Path>() {
            @Override
            public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
                Files.delete(file);
                return FileVisitResult.CONTINUE;
            }

            @Override
            public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
                Files.delete(dir);
                return FileVisitResult.CONTINUE;
            }
        });
    }

    public static void recursiveCopyDir(final Path sourceDir, final Path targetDir) throws IOException {
        Files.walkFileTree(sourceDir, new SimpleFileVisitor<Path>() {
            @Override
            public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
                Files.copy(file, Paths.get(file.toString().replace(sourceDir.toString(), targetDir.toString())));
                return FileVisitResult.CONTINUE;
            }

            @Override
            public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) throws IOException {
                Files.createDirectories(Paths.get(dir.toString().replace(sourceDir.toString(), targetDir.toString())));
                return FileVisitResult.CONTINUE;
            }
        });
    }

    private static List<String> fileToLines(Path path) throws IOException {
        List<String> lines = new LinkedList<>();
        String line;
        try (BufferedReader reader = new BufferedReader(new FileReader(path.toFile()))) {
            while ((line = reader.readLine()) != null)
                lines.add(line);
        }
        catch (Exception e) {}
        return lines;
    }

    private static void printUnifiedDiff(Path oldPath, Path newPath, List<String> oldContent, List<String> newContent) {
        List<String> diffLines = DiffUtils.generateUnifiedDiff(
            newPath.toString(),
            oldPath.toString(),
            oldContent,
            DiffUtils.diff(oldContent, newContent),
            3
        );
        System.out.println();
        for (String diffLine : diffLines)
            System.out.println(diffLine);
    }

    public static void main(String[] args) throws IOException, InterruptedException {
        String watchDirName = args.length > 0 ? args[0] : DEFAULT_WATCH_DIR;
        String shadowDirName = args.length > 1 ? args[1] : DEFAULT_SHADOW_DIR;
        int watchInterval = args.length > 2 ? Integer.getInteger(args[2]) : DEFAULT_WATCH_INTERVAL;
        new FileChangeWatcher(Paths.get(watchDirName), Paths.get(shadowDirName), watchInterval).run();
    }
}

I recommend to use the default settings (e.g. use a source directory named "watch-dir") and play around with it for a while, watching the console output as you create and edit some text files in an editor. It helps understand the software's inner mechanics. If something goes wrong, e.g. within one 5 second rhythm a file is created but also quickly deleted again, there is nothing to copy or diff, so the program will just print a stack trace to System.err.


Okay, here is another answer as a variation of my previous one for changes at any file position (diff). Now the somewhat simpler case is files only being appended (tail).

How to build:

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>de.scrum-master.tools</groupId>
    <artifactId>SO_WatchServiceChangeLocationInFile</artifactId>
    <version>1.0-SNAPSHOT</version>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    </properties>

    <build>
        <plugins>
            <plugin>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.1</version>
                <configuration>
                    <source>1.7</source>
                    <target>1.7</target>
                </configuration>
            </plugin>
        </plugins>
    </build>

    <dependencies>
        <dependency>
            <groupId>commons-io</groupId>
            <artifactId>commons-io</artifactId>
            <!-- Use snapshot because of the UTF-8 problem in https://issues.apache.org/jira/browse/IO-354 -->
            <version>2.5-SNAPSHOT</version>
        </dependency>
    </dependencies>

    <repositories>
        <repository>
            <id>apache.snapshots</id>
            <url>http://repository.apache.org/snapshots/</url>
        </repository>
    </repositories>
</project>

As you can see, we use Apache Commons IO here. (Why a snapshot version? Follow the link in the XML comment if you are interested.)

Source code:

package de.scrum_master.app;

import org.apache.commons.io.input.Tailer;
import org.apache.commons.io.input.TailerListenerAdapter;

import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.*;

import static java.nio.file.StandardWatchEventKinds.ENTRY_CREATE;

public class FileTailWatcher {
    public static final String DEFAULT_WATCH_DIR = "watch-dir";
    public static final int DEFAULT_WATCH_INTERVAL = 5;

    private Path watchDir;
    private int watchInterval;
    private WatchService watchService;

    public FileTailWatcher(Path watchDir, int watchInterval) throws IOException {
        if (!Files.isDirectory(watchDir))
            throw new IllegalArgumentException("Path '" + watchDir + "' is not a directory");
        this.watchDir = watchDir;
        this.watchInterval = watchInterval;
        watchService = FileSystems.getDefault().newWatchService();
    }

    public static class MyTailerListener extends TailerListenerAdapter {
        public void handle(String line) {
            System.out.println(line);
        }
    }

    public void run() throws InterruptedException, IOException {
        try (DirectoryStream<Path> dirEntries = Files.newDirectoryStream(watchDir)) {
            for (Path file : dirEntries)
                createTailer(file);
        }
        watchDir.register(watchService, ENTRY_CREATE);
        while (true) {
            WatchKey watchKey = watchService.take();
            for (WatchEvent<?> event : watchKey.pollEvents())
                createTailer(watchDir.resolve((Path) event.context()));
            watchKey.reset();
            Thread.sleep(1000 * watchInterval);
        }
    }

    private Tailer createTailer(Path path) {
        if (Files.isDirectory(path))
            return null;
        System.out.println("Creating tailer: " + path);
        return Tailer.create(
            path.toFile(),             // File to be monitored
            Charset.defaultCharset(),  // Character set (available since Commons IO 2.5)
            new MyTailerListener(),    // What should happen for new tail events?
            1000,                      // Delay between checks in ms
            true,                      // Tail from end of file, not from beginning
            true,                      // Close & reopen files in between reads,
                                       // otherwise file is locked on Windows and cannot be deleted
            4096                       // Read buffer size
        );
    }

    public static void main(String[] args) throws IOException, InterruptedException {
        String watchDirName = args.length > 0 ? args[0] : DEFAULT_WATCH_DIR;
        int watchInterval = args.length > 2 ? Integer.getInteger(args[2]) : DEFAULT_WATCH_INTERVAL;
        new FileTailWatcher(Paths.get(watchDirName), watchInterval).run();
    }
}

Now try appending to existing files and/or creating new ones. Everything will be printed to standard output. In a production environment you would maybe display multiple windows or tabs, one for each log file. Whatever...

@Simon: I hope this one suits your situation better than the more general case and is worth a bounty. :-)

Tags:

Java

Nio