StitchingΒΆ

Stitch Avro resources in Crux Dataset.

from crux import Crux

conn = Crux()

dataset_object = conn.get_dataset(id="567890")

destination_file = dataset_object.create_file(
    path="/test_destination_file.avro",
    description="test_destination_description",
    tags=["tags1", "tags2"]
)

file_object = dataset_object.upload_file(
    tags=["test_tag1"],
    description="test_description",
    path="/twitter.avro",
    local_path="/tmp/twitter.avro"
    )

file_object2 = dataset_object.upload_file(
    tags=["test_tag1"],
    description="test_description",
    path="/twitter2.avro",
    local_path="/tmp/twitter2.avro"
    )


file_obj, job_id = dataset_object.stitch(
    source_resources = [
        "/twitter.avro",
        "/twitter2.avro"
    ],
    destination_resource = "/test_destination_file.avro",
    labels = {
        "test_label1": "test_value1"
    }
)

# OR

file_obj, job_id = dataset_object.stitch(
    source_resources = [
        file_object,
        file_object2
    ],
    destination_resource = destination_file,
    labels = {
        "test_label1": "test_value1"
    }
)

if file_obj.download("/tmp/stitched_twitter.avro", media_type="avro/binary"):
    print("Downloaded the file")

job = datast_object.get_stitch_job(job_id)
print(job.status)