python data sink code example

Example: python data sink

# The four classes below implement a simple DataPipeline. The code would need to be filled in by the user.

class WordDoc:
    ...

class PDF:
    ...

class SQLDatabase(DataSource, DataSink):
    @get.register(WordDoc)  # Tells the DataPipeline that this SQL database can provide a WordDoc
    def get_word_doc(query: Dict[str, Any]) -> WordDoc:
        """Returns a WordDoc from an SQL database based on the `filename` in the query."""

    @put.register(WordDoc)  # Tell the DataPipeline that this SQL database can store a WordDoc
    def put_word_doc(doc: WordDoc, query: Dict[str, Any]):
        """Stores the document in the SQL database using the query as an identifier."""

class DocumentTransformer(Transformer):
    @transform.register(WordDoc, PDF)  # Tells the DataPipeline that we know how to convert a WordDoc to a PDF
    def Word_to_PDF(doc: WordDoc) -> PDF:
        """Converts a WordDoc to a PDF and returns the PDF."""


# The line of code below can now be used to request a PDF.
# The WordDoc with the filename `find_me` will be pulled from the SQL database then converted to a PDF and returned to the user.
my_pdf = pipeline.get(PDF, query={"filename": "find_me"})

# Note also that because we implemented a `put(WordDoc)` method in the SQLDatabase that it will also store WordDocs that pass through the SQL database via the pipeline but are not already in the database.

Tags:

Misc Example