Class OpenLineage

java.lang.Object
io.openlineage.client.OpenLineage

public final class OpenLineage extends Object
Usage:

   URI producer = URI.create("http://my.producer/uri");
   OpenLineage ol = new OpenLineage(producer);
   UUID runId = UUID.randomUUID();
   RunFacets runFacets =
     ol.newRunFacetsBuilder().nominalTime(ol.newNominalTimeRunFacet(now, now)).build();
   Run run = ol.newRun(runId, runFacets);
   String name = "jobName";
   String namespace = "namespace";
   JobFacets jobFacets = ol.newJobFacetsBuilder().build();
   Job job = ol.newJob(namespace, name, jobFacets);
   List<InputDataset> inputs = Arrays.asList(ol.newInputDataset("ins", "input", null, null));
   List<OutputDataset> outputs = Arrays.asList(ol.newOutputDataset("ons", "output", null, null));
   RunEvent runStateUpdate =
     ol.newRunEvent(now, OpenLineage.RunEvent.EventType.START, run, job, inputs, outputs);
 
  • Constructor Details

    • OpenLineage

      public OpenLineage(URI producer)
      Starting point to create OpenLineage objects. Use the OpenLineage instance to create events and facets
      Parameters:
      producer - the identifier of the library using the client to generate OpenLineage events
  • Method Details

    • newRunEvent

      Factory method for RunEvent
      Parameters:
      eventTime - the time the event occurred at
      eventType - the current transition of the run state. It is required to issue 1 START event and 1 of [ COMPLETE, ABORT, FAIL ] event per run. Additional events with OTHER eventType can be added to the same run. For example to send additional metadata after the run is complete
      run - the run
      job - the job
      inputs - The set of **input** datasets.
      outputs - The set of **output** datasets.
      Returns:
      RunEvent
    • newRunEventBuilder

      public OpenLineage.RunEventBuilder newRunEventBuilder()
      Creates a builder for RunEvent
      Returns:
      a new builder for RunEvent
    • newSchemaDatasetFacetFields

      public OpenLineage.SchemaDatasetFacetFields newSchemaDatasetFacetFields(String name, String type, String description, List<OpenLineage.SchemaDatasetFacetFields> fields)
      Factory method for SchemaDatasetFacetFields
      Parameters:
      name - The name of the field.
      type - The type of the field.
      description - The description of the field.
      fields - Nested struct fields.
      Returns:
      SchemaDatasetFacetFields
    • newSchemaDatasetFacetFieldsBuilder

      public OpenLineage.SchemaDatasetFacetFieldsBuilder newSchemaDatasetFacetFieldsBuilder()
      Creates a builder for SchemaDatasetFacetFields
      Returns:
      a new builder for SchemaDatasetFacetFields
    • newParentRunFacet

      Factory method for ParentRunFacet
      Parameters:
      run - the run
      job - the job
      Returns:
      ParentRunFacet
    • newParentRunFacetBuilder

      public OpenLineage.ParentRunFacetBuilder newParentRunFacetBuilder()
      Creates a builder for ParentRunFacet
      Returns:
      a new builder for ParentRunFacet
    • newOutputStatisticsOutputDatasetFacet

      public OpenLineage.OutputStatisticsOutputDatasetFacet newOutputStatisticsOutputDatasetFacet(Long rowCount, Long size, Long fileCount)
      Factory method for OutputStatisticsOutputDatasetFacet
      Parameters:
      rowCount - The number of rows written to the dataset
      size - The size in bytes written to the dataset
      fileCount - The number of files written to the dataset
      Returns:
      OutputStatisticsOutputDatasetFacet
    • newOutputStatisticsOutputDatasetFacetBuilder

      public OpenLineage.OutputStatisticsOutputDatasetFacetBuilder newOutputStatisticsOutputDatasetFacetBuilder()
      Creates a builder for OutputStatisticsOutputDatasetFacet
      Returns:
      a new builder for OutputStatisticsOutputDatasetFacet
    • newExtractionErrorRunFacetErrors

      public OpenLineage.ExtractionErrorRunFacetErrors newExtractionErrorRunFacetErrors(String errorMessage, String stackTrace, String task, Long taskNumber)
      Factory method for ExtractionErrorRunFacetErrors
      Parameters:
      errorMessage - Text representation of extraction error message.
      stackTrace - Stack trace of extraction error message
      task - Text representation of task that failed. This can be, for example, SQL statement that parser could not interpret.
      taskNumber - Order of task (counted from 0).
      Returns:
      ExtractionErrorRunFacetErrors
    • newExtractionErrorRunFacetErrorsBuilder

      public OpenLineage.ExtractionErrorRunFacetErrorsBuilder newExtractionErrorRunFacetErrorsBuilder()
      Creates a builder for ExtractionErrorRunFacetErrors
      Returns:
      a new builder for ExtractionErrorRunFacetErrors
    • newJobTypeJobFacet

      public OpenLineage.JobTypeJobFacet newJobTypeJobFacet(String processingType, String integration, String jobType)
      Factory method for JobTypeJobFacet
      Parameters:
      processingType - Job processing type like: BATCH or STREAMING
      integration - OpenLineage integration type of this job: for example SPARK|DBT|AIRFLOW|FLINK
      jobType - Run type, for example: QUERY|COMMAND|DAG|TASK|JOB|MODEL. This is an integration-specific field.
      Returns:
      JobTypeJobFacet
    • newJobTypeJobFacetBuilder

      public OpenLineage.JobTypeJobFacetBuilder newJobTypeJobFacetBuilder()
      Creates a builder for JobTypeJobFacet
      Returns:
      a new builder for JobTypeJobFacet
    • newRun

      public OpenLineage.Run newRun(UUID runId, OpenLineage.RunFacets facets)
      Factory method for Run
      Parameters:
      runId - The globally unique ID of the run associated with the job.
      facets - The run facets.
      Returns:
      Run
    • newRunBuilder

      public OpenLineage.RunBuilder newRunBuilder()
      Creates a builder for Run
      Returns:
      a new builder for Run
    • newDataQualityMetricsInputDatasetFacetColumnMetricsAdditionalQuantiles

      public OpenLineage.DataQualityMetricsInputDatasetFacetColumnMetricsAdditionalQuantiles newDataQualityMetricsInputDatasetFacetColumnMetricsAdditionalQuantiles()
      Factory method for DataQualityMetricsInputDatasetFacetColumnMetricsAdditionalQuantiles
      Returns:
      DataQualityMetricsInputDatasetFacetColumnMetricsAdditionalQuantiles
    • newDataQualityMetricsInputDatasetFacetColumnMetricsAdditionalQuantilesBuilder

      public OpenLineage.DataQualityMetricsInputDatasetFacetColumnMetricsAdditionalQuantilesBuilder newDataQualityMetricsInputDatasetFacetColumnMetricsAdditionalQuantilesBuilder()
      Creates a builder for DataQualityMetricsInputDatasetFacetColumnMetricsAdditionalQuantiles
      Returns:
      a new builder for DataQualityMetricsInputDatasetFacetColumnMetricsAdditionalQuantiles
    • newExternalQueryRunFacet

      public OpenLineage.ExternalQueryRunFacet newExternalQueryRunFacet(String externalQueryId, String source)
      Factory method for ExternalQueryRunFacet
      Parameters:
      externalQueryId - Identifier for the external system
      source - source of the external query
      Returns:
      ExternalQueryRunFacet
    • newExternalQueryRunFacetBuilder

      public OpenLineage.ExternalQueryRunFacetBuilder newExternalQueryRunFacetBuilder()
      Creates a builder for ExternalQueryRunFacet
      Returns:
      a new builder for ExternalQueryRunFacet
    • newOutputDatasetOutputFacets

      public OpenLineage.OutputDatasetOutputFacets newOutputDatasetOutputFacets(OpenLineage.OutputStatisticsOutputDatasetFacet outputStatistics)
      Factory method for OutputDatasetOutputFacets
      Parameters:
      outputStatistics - the outputStatistics
      Returns:
      OutputDatasetOutputFacets
    • newOutputDatasetOutputFacetsBuilder

      public OpenLineage.OutputDatasetOutputFacetsBuilder newOutputDatasetOutputFacetsBuilder()
      Creates a builder for OutputDatasetOutputFacets
      Returns:
      a new builder for OutputDatasetOutputFacets
    • newNominalTimeRunFacet

      public OpenLineage.NominalTimeRunFacet newNominalTimeRunFacet(ZonedDateTime nominalStartTime, ZonedDateTime nominalEndTime)
      Factory method for NominalTimeRunFacet
      Parameters:
      nominalStartTime - An [ISO-8601](https://en.wikipedia.org/wiki/ISO_8601) timestamp representing the nominal start time (included) of the run. AKA the schedule time
      nominalEndTime - An [ISO-8601](https://en.wikipedia.org/wiki/ISO_8601) timestamp representing the nominal end time (excluded) of the run. (Should be the nominal start time of the next run)
      Returns:
      NominalTimeRunFacet
    • newNominalTimeRunFacetBuilder

      public OpenLineage.NominalTimeRunFacetBuilder newNominalTimeRunFacetBuilder()
      Creates a builder for NominalTimeRunFacet
      Returns:
      a new builder for NominalTimeRunFacet
    • newRunFacet

      public OpenLineage.RunFacet newRunFacet()
      Returns:
      RunFacet
    • newOwnershipJobFacetOwners

      public OpenLineage.OwnershipJobFacetOwners newOwnershipJobFacetOwners(String name, String type)
      Factory method for OwnershipJobFacetOwners
      Parameters:
      name - the identifier of the owner of the Job. It is recommended to define this as a URN. For example application:foo, user:jdoe, team:data
      type - The type of ownership (optional)
      Returns:
      OwnershipJobFacetOwners
    • newOwnershipJobFacetOwnersBuilder

      public OpenLineage.OwnershipJobFacetOwnersBuilder newOwnershipJobFacetOwnersBuilder()
      Creates a builder for OwnershipJobFacetOwners
      Returns:
      a new builder for OwnershipJobFacetOwners
    • newGcpLineageJobFacet

      public OpenLineage.GcpLineageJobFacet newGcpLineageJobFacet(String displayName, OpenLineage.GcpLineageJobFacetOrigin origin)
      Factory method for GcpLineageJobFacet
      Parameters:
      displayName - The name of the job to be used on UI
      origin - the origin
      Returns:
      GcpLineageJobFacet
    • newGcpLineageJobFacetBuilder

      public OpenLineage.GcpLineageJobFacetBuilder newGcpLineageJobFacetBuilder()
      Creates a builder for GcpLineageJobFacet
      Returns:
      a new builder for GcpLineageJobFacet
    • newInputDatasetFacet

      public OpenLineage.InputDatasetFacet newInputDatasetFacet()
      Returns:
      InputDatasetFacet
    • newJobEvent

      Factory method for JobEvent
      Parameters:
      eventTime - the time the event occurred at
      job - the job
      inputs - The set of **input** datasets.
      outputs - The set of **output** datasets.
      Returns:
      JobEvent
    • newJobEventBuilder

      public OpenLineage.JobEventBuilder newJobEventBuilder()
      Creates a builder for JobEvent
      Returns:
      a new builder for JobEvent
    • newExtractionErrorRunFacet

      public OpenLineage.ExtractionErrorRunFacet newExtractionErrorRunFacet(Long totalTasks, Long failedTasks, List<OpenLineage.ExtractionErrorRunFacetErrors> errors)
      Factory method for ExtractionErrorRunFacet
      Parameters:
      totalTasks - The number of distinguishable tasks in a run that were processed by OpenLineage, whether successfully or not. Those could be, for example, distinct SQL statements.
      failedTasks - The number of distinguishable tasks in a run that were processed not successfully by OpenLineage. Those could be, for example, distinct SQL statements.
      errors - the errors
      Returns:
      ExtractionErrorRunFacet
    • newExtractionErrorRunFacetBuilder

      public OpenLineage.ExtractionErrorRunFacetBuilder newExtractionErrorRunFacetBuilder()
      Creates a builder for ExtractionErrorRunFacet
      Returns:
      a new builder for ExtractionErrorRunFacet
    • newOwnershipDatasetFacetOwners

      public OpenLineage.OwnershipDatasetFacetOwners newOwnershipDatasetFacetOwners(String name, String type)
      Factory method for OwnershipDatasetFacetOwners
      Parameters:
      name - the identifier of the owner of the Dataset. It is recommended to define this as a URN. For example application:foo, user:jdoe, team:data
      type - The type of ownership (optional)
      Returns:
      OwnershipDatasetFacetOwners
    • newOwnershipDatasetFacetOwnersBuilder

      public OpenLineage.OwnershipDatasetFacetOwnersBuilder newOwnershipDatasetFacetOwnersBuilder()
      Creates a builder for OwnershipDatasetFacetOwners
      Returns:
      a new builder for OwnershipDatasetFacetOwners
    • newColumnLineageDatasetFacetFieldsAdditional

      public OpenLineage.ColumnLineageDatasetFacetFieldsAdditional newColumnLineageDatasetFacetFieldsAdditional(List<OpenLineage.ColumnLineageDatasetFacetFieldsAdditionalInputFields> inputFields, String transformationDescription, String transformationType)
      Factory method for ColumnLineageDatasetFacetFieldsAdditional
      Parameters:
      inputFields - the inputFields
      transformationDescription - a string representation of the transformation applied
      transformationType - IDENTITY|MASKED reflects a clearly defined behavior. IDENTITY: exact same as input; MASKED: no original data available (like a hash of PII for example)
      Returns:
      ColumnLineageDatasetFacetFieldsAdditional
    • newColumnLineageDatasetFacetFieldsAdditionalBuilder

      public OpenLineage.ColumnLineageDatasetFacetFieldsAdditionalBuilder newColumnLineageDatasetFacetFieldsAdditionalBuilder()
      Creates a builder for ColumnLineageDatasetFacetFieldsAdditional
      Returns:
      a new builder for ColumnLineageDatasetFacetFieldsAdditional
    • newDataQualityMetricsInputDatasetFacetColumnMetricsAdditional

      public OpenLineage.DataQualityMetricsInputDatasetFacetColumnMetricsAdditional newDataQualityMetricsInputDatasetFacetColumnMetricsAdditional(Long nullCount, Long distinctCount, Double sum, Double count, Double min, Double max, OpenLineage.DataQualityMetricsInputDatasetFacetColumnMetricsAdditionalQuantiles quantiles)
      Factory method for DataQualityMetricsInputDatasetFacetColumnMetricsAdditional
      Parameters:
      nullCount - The number of null values in this column for the rows evaluated
      distinctCount - The number of distinct values in this column for the rows evaluated
      sum - The total sum of values in this column for the rows evaluated
      count - The number of values in this column
      min - the min
      max - the max
      quantiles - The property key is the quantile. Examples: 0.1 0.25 0.5 0.75 1
      Returns:
      DataQualityMetricsInputDatasetFacetColumnMetricsAdditional
    • newDataQualityMetricsInputDatasetFacetColumnMetricsAdditionalBuilder

      public OpenLineage.DataQualityMetricsInputDatasetFacetColumnMetricsAdditionalBuilder newDataQualityMetricsInputDatasetFacetColumnMetricsAdditionalBuilder()
      Creates a builder for DataQualityMetricsInputDatasetFacetColumnMetricsAdditional
      Returns:
      a new builder for DataQualityMetricsInputDatasetFacetColumnMetricsAdditional
    • newDataQualityMetricsInputDatasetFacetColumnMetrics

      public OpenLineage.DataQualityMetricsInputDatasetFacetColumnMetrics newDataQualityMetricsInputDatasetFacetColumnMetrics()
      Factory method for DataQualityMetricsInputDatasetFacetColumnMetrics
      Returns:
      DataQualityMetricsInputDatasetFacetColumnMetrics
    • newDataQualityMetricsInputDatasetFacetColumnMetricsBuilder

      public OpenLineage.DataQualityMetricsInputDatasetFacetColumnMetricsBuilder newDataQualityMetricsInputDatasetFacetColumnMetricsBuilder()
      Creates a builder for DataQualityMetricsInputDatasetFacetColumnMetrics
      Returns:
      a new builder for DataQualityMetricsInputDatasetFacetColumnMetrics
    • newDataQualityMetricsInputDatasetFacet

      public OpenLineage.DataQualityMetricsInputDatasetFacet newDataQualityMetricsInputDatasetFacet(Long rowCount, Long bytes, Long fileCount, OpenLineage.DataQualityMetricsInputDatasetFacetColumnMetrics columnMetrics)
      Factory method for DataQualityMetricsInputDatasetFacet
      Parameters:
      rowCount - The number of rows evaluated
      bytes - The size in bytes
      fileCount - The number of files evaluated
      columnMetrics - The property key is the column name
      Returns:
      DataQualityMetricsInputDatasetFacet
    • newDataQualityMetricsInputDatasetFacetBuilder

      public OpenLineage.DataQualityMetricsInputDatasetFacetBuilder newDataQualityMetricsInputDatasetFacetBuilder()
      Creates a builder for DataQualityMetricsInputDatasetFacet
      Returns:
      a new builder for DataQualityMetricsInputDatasetFacet
    • newDocumentationJobFacet

      public OpenLineage.DocumentationJobFacet newDocumentationJobFacet(String description)
      Factory method for DocumentationJobFacet
      Parameters:
      description - The description of the job.
      Returns:
      DocumentationJobFacet
    • newDocumentationJobFacetBuilder

      public OpenLineage.DocumentationJobFacetBuilder newDocumentationJobFacetBuilder()
      Creates a builder for DocumentationJobFacet
      Returns:
      a new builder for DocumentationJobFacet
    • newJobFacets

      Factory method for JobFacets
      Parameters:
      jobType - the jobType
      sourceCode - the sourceCode
      ownership - the ownership
      gcp_lineage - the gcp_lineage
      sql - the sql
      sourceCodeLocation - the sourceCodeLocation
      documentation - the documentation
      Returns:
      JobFacets
    • newJobFacetsBuilder

      public OpenLineage.JobFacetsBuilder newJobFacetsBuilder()
      Creates a builder for JobFacets
      Returns:
      a new builder for JobFacets
    • newInputDatasetInputFacets

      Factory method for InputDatasetInputFacets
      Parameters:
      dataQualityAssertions - the dataQualityAssertions
      dataQualityMetrics - the dataQualityMetrics
      Returns:
      InputDatasetInputFacets
    • newInputDatasetInputFacetsBuilder

      public OpenLineage.InputDatasetInputFacetsBuilder newInputDatasetInputFacetsBuilder()
      Creates a builder for InputDatasetInputFacets
      Returns:
      a new builder for InputDatasetInputFacets
    • newDatasetFacet

      public OpenLineage.DatasetFacet newDatasetFacet()
      Returns:
      DatasetFacet
    • newDeletedDatasetFacet

      public OpenLineage.DatasetFacet newDeletedDatasetFacet()
      Returns:
      a deleted DatasetFacet
    • newOwnershipDatasetFacet

      Factory method for OwnershipDatasetFacet
      Parameters:
      owners - The owners of the dataset.
      Returns:
      OwnershipDatasetFacet
    • newOwnershipDatasetFacetBuilder

      public OpenLineage.OwnershipDatasetFacetBuilder newOwnershipDatasetFacetBuilder()
      Creates a builder for OwnershipDatasetFacet
      Returns:
      a new builder for OwnershipDatasetFacet
    • newSQLJobFacet

      public OpenLineage.SQLJobFacet newSQLJobFacet(String query)
      Factory method for SQLJobFacet
      Parameters:
      query - the query
      Returns:
      SQLJobFacet
    • newSQLJobFacetBuilder

      public OpenLineage.SQLJobFacetBuilder newSQLJobFacetBuilder()
      Creates a builder for SQLJobFacet
      Returns:
      a new builder for SQLJobFacet
    • newDatasetFacets

      Factory method for DatasetFacets
      Parameters:
      documentation - the documentation
      dataSource - the dataSource
      version - the version
      schema - the schema
      ownership - the ownership
      storage - the storage
      columnLineage - the columnLineage
      symlinks - the symlinks
      lifecycleStateChange - the lifecycleStateChange
      Returns:
      DatasetFacets
    • newDatasetFacetsBuilder

      public OpenLineage.DatasetFacetsBuilder newDatasetFacetsBuilder()
      Creates a builder for DatasetFacets
      Returns:
      a new builder for DatasetFacets
    • newOwnershipJobFacet

      Factory method for OwnershipJobFacet
      Parameters:
      owners - The owners of the job.
      Returns:
      OwnershipJobFacet
    • newOwnershipJobFacetBuilder

      public OpenLineage.OwnershipJobFacetBuilder newOwnershipJobFacetBuilder()
      Creates a builder for OwnershipJobFacet
      Returns:
      a new builder for OwnershipJobFacet
    • newParentRunFacetJob

      public OpenLineage.ParentRunFacetJob newParentRunFacetJob(String namespace, String name)
      Factory method for ParentRunFacetJob
      Parameters:
      namespace - The namespace containing that job
      name - The unique name for that job within that namespace
      Returns:
      ParentRunFacetJob
    • newParentRunFacetJobBuilder

      public OpenLineage.ParentRunFacetJobBuilder newParentRunFacetJobBuilder()
      Creates a builder for ParentRunFacetJob
      Returns:
      a new builder for ParentRunFacetJob
    • newGcpLineageJobFacetOrigin

      public OpenLineage.GcpLineageJobFacetOrigin newGcpLineageJobFacetOrigin(String sourceType, String name)
      Factory method for GcpLineageJobFacetOrigin
      Parameters:
      sourceType - Type of the source. Possible values can be found in GCP documentation (https://cloud.google.com/data-catalog/docs/reference/data-lineage/rest/v1/projects.locations.processes#SourceType)
      name - If the sourceType isn't CUSTOM, the value of this field should be a GCP resource name of the system, which reports lineage. The project and location parts of the resource name must match the project and location of the lineage resource being created. More details in GCP documentation https://cloud.google.com/data-catalog/docs/reference/data-lineage/rest/v1/projects.locations.processes#origin
      Returns:
      GcpLineageJobFacetOrigin
    • newGcpLineageJobFacetOriginBuilder

      public OpenLineage.GcpLineageJobFacetOriginBuilder newGcpLineageJobFacetOriginBuilder()
      Creates a builder for GcpLineageJobFacetOrigin
      Returns:
      a new builder for GcpLineageJobFacetOrigin
    • newOutputDataset

      public OpenLineage.OutputDataset newOutputDataset(String namespace, String name, OpenLineage.DatasetFacets facets, OpenLineage.OutputDatasetOutputFacets outputFacets)
      Factory method for OutputDataset
      Parameters:
      namespace - The namespace containing that dataset
      name - The unique name for that dataset within that namespace
      facets - The facets for this dataset
      outputFacets - The output facets for this dataset
      Returns:
      OutputDataset
    • newOutputDatasetBuilder

      public OpenLineage.OutputDatasetBuilder newOutputDatasetBuilder()
      Creates a builder for OutputDataset
      Returns:
      a new builder for OutputDataset
    • newErrorMessageRunFacet

      public OpenLineage.ErrorMessageRunFacet newErrorMessageRunFacet(String message, String programmingLanguage, String stackTrace)
      Factory method for ErrorMessageRunFacet
      Parameters:
      message - A human-readable string representing error message generated by observed system
      programmingLanguage - Programming language the observed system uses.
      stackTrace - A language-specific stack trace generated by observed system
      Returns:
      ErrorMessageRunFacet
    • newErrorMessageRunFacetBuilder

      public OpenLineage.ErrorMessageRunFacetBuilder newErrorMessageRunFacetBuilder()
      Creates a builder for ErrorMessageRunFacet
      Returns:
      a new builder for ErrorMessageRunFacet
    • newJobFacet

      public OpenLineage.JobFacet newJobFacet()
      Returns:
      JobFacet
    • newDeletedJobFacet

      public OpenLineage.JobFacet newDeletedJobFacet()
      Returns:
      a deleted JobFacet
    • newRunFacets

      Factory method for RunFacets
      Parameters:
      errorMessage - the errorMessage
      externalQuery - the externalQuery
      extractionError - the extractionError
      parent - the parent
      nominalTime - the nominalTime
      gcp_dataproc_spark - the gcp_dataproc_spark
      processing_engine - the processing_engine
      Returns:
      RunFacets
    • newRunFacetsBuilder

      public OpenLineage.RunFacetsBuilder newRunFacetsBuilder()
      Creates a builder for RunFacets
      Returns:
      a new builder for RunFacets
    • newSchemaDatasetFacet

      Factory method for SchemaDatasetFacet
      Parameters:
      fields - The fields of the data source.
      Returns:
      SchemaDatasetFacet
    • newSchemaDatasetFacetBuilder

      public OpenLineage.SchemaDatasetFacetBuilder newSchemaDatasetFacetBuilder()
      Creates a builder for SchemaDatasetFacet
      Returns:
      a new builder for SchemaDatasetFacet
    • newSourceCodeJobFacet

      public OpenLineage.SourceCodeJobFacet newSourceCodeJobFacet(String language, String sourceCode)
      Factory method for SourceCodeJobFacet
      Parameters:
      language - Language in which source code of this job was written.
      sourceCode - Source code of this job.
      Returns:
      SourceCodeJobFacet
    • newSourceCodeJobFacetBuilder

      public OpenLineage.SourceCodeJobFacetBuilder newSourceCodeJobFacetBuilder()
      Creates a builder for SourceCodeJobFacet
      Returns:
      a new builder for SourceCodeJobFacet
    • newDocumentationDatasetFacet

      public OpenLineage.DocumentationDatasetFacet newDocumentationDatasetFacet(String description)
      Factory method for DocumentationDatasetFacet
      Parameters:
      description - The description of the dataset.
      Returns:
      DocumentationDatasetFacet
    • newDocumentationDatasetFacetBuilder

      public OpenLineage.DocumentationDatasetFacetBuilder newDocumentationDatasetFacetBuilder()
      Creates a builder for DocumentationDatasetFacet
      Returns:
      a new builder for DocumentationDatasetFacet
    • newSourceCodeLocationJobFacet

      public OpenLineage.SourceCodeLocationJobFacet newSourceCodeLocationJobFacet(String type, URI url, String repoUrl, String path, String version, String tag, String branch)
      Factory method for SourceCodeLocationJobFacet
      Parameters:
      type - the source control system
      url - the full http URL to locate the file
      repoUrl - the URL to the repository
      path - the path in the repo containing the source files
      version - the current version deployed (not a branch name, the actual unique version)
      tag - optional tag name
      branch - optional branch name
      Returns:
      SourceCodeLocationJobFacet
    • newSourceCodeLocationJobFacetBuilder

      public OpenLineage.SourceCodeLocationJobFacetBuilder newSourceCodeLocationJobFacetBuilder()
      Creates a builder for SourceCodeLocationJobFacet
      Returns:
      a new builder for SourceCodeLocationJobFacet
    • newDataQualityAssertionsDatasetFacet

      Factory method for DataQualityAssertionsDatasetFacet
      Parameters:
      assertions - the assertions
      Returns:
      DataQualityAssertionsDatasetFacet
    • newDataQualityAssertionsDatasetFacetBuilder

      public OpenLineage.DataQualityAssertionsDatasetFacetBuilder newDataQualityAssertionsDatasetFacetBuilder()
      Creates a builder for DataQualityAssertionsDatasetFacet
      Returns:
      a new builder for DataQualityAssertionsDatasetFacet
    • newColumnLineageDatasetFacet

      Factory method for ColumnLineageDatasetFacet
      Parameters:
      fields - Column level lineage that maps output fields into input fields used to evaluate them.
      Returns:
      ColumnLineageDatasetFacet
    • newColumnLineageDatasetFacetBuilder

      public OpenLineage.ColumnLineageDatasetFacetBuilder newColumnLineageDatasetFacetBuilder()
      Creates a builder for ColumnLineageDatasetFacet
      Returns:
      a new builder for ColumnLineageDatasetFacet
    • newDatasetEvent

      public OpenLineage.DatasetEvent newDatasetEvent(ZonedDateTime eventTime, OpenLineage.StaticDataset dataset)
      Factory method for DatasetEvent
      Parameters:
      eventTime - the time the event occurred at
      dataset - the dataset
      Returns:
      DatasetEvent
    • newDatasetEventBuilder

      public OpenLineage.DatasetEventBuilder newDatasetEventBuilder()
      Creates a builder for DatasetEvent
      Returns:
      a new builder for DatasetEvent
    • newProcessingEngineRunFacet

      public OpenLineage.ProcessingEngineRunFacet newProcessingEngineRunFacet(String version, String name, String openlineageAdapterVersion)
      Factory method for ProcessingEngineRunFacet
      Parameters:
      version - Processing engine version. Might be Airflow or Spark version.
      name - Processing engine name, e.g. Airflow or Spark
      openlineageAdapterVersion - OpenLineage adapter package version. Might be e.g. OpenLineage Airflow integration package version
      Returns:
      ProcessingEngineRunFacet
    • newProcessingEngineRunFacetBuilder

      public OpenLineage.ProcessingEngineRunFacetBuilder newProcessingEngineRunFacetBuilder()
      Creates a builder for ProcessingEngineRunFacet
      Returns:
      a new builder for ProcessingEngineRunFacet
    • newGcpDataprocSparkRunFacet

      public OpenLineage.GcpDataprocSparkRunFacet newGcpDataprocSparkRunFacet(String appId, String appName, String batchId, String batchUuid, String clusterName, String clusterUuid, String jobId, String jobUuid, String projectId, String queryNodeName, String sessionId, String sessionUuid)
      Factory method for GcpDataprocSparkRunFacet
      Parameters:
      appId - Application ID set in the spark configuration of the current context. Its format depends on the resource manager.
      appName - App name set in the spark configuration of the current context. It may be provided by the user.
      batchId - Populated only for Dataproc serverless batches. The resource id of the batch.
      batchUuid - Populated only for Dataproc serverless batches. A UUID generated by the service when it creates the batch.
      clusterName - Populated only for Dataproc GCE workloads. The cluster name is unique within a GCP project.
      clusterUuid - Populated only for Dataproc GCE workloads. A UUID generated by the service at the time of cluster creation.
      jobId - Populated only for Dataproc GCE workloads. If not specified by the user, the job ID will be provided by the service.
      jobUuid - Populated only for Dataproc GCE workloads. A UUID that uniquely identifies a job within the project over time.
      projectId - The GCP project ID that the resource belongs to.
      queryNodeName - The name of the query node in the executed Spark Plan. Often used to describe the command being executed.
      sessionId - Populated only for Dataproc serverless interactive sessions. The resource id of the session, used for URL generation.
      sessionUuid - Populated only for Dataproc serverless interactive sessions. A UUID generated by the service when it creates the session.
      Returns:
      GcpDataprocSparkRunFacet
    • newGcpDataprocSparkRunFacetBuilder

      public OpenLineage.GcpDataprocSparkRunFacetBuilder newGcpDataprocSparkRunFacetBuilder()
      Creates a builder for GcpDataprocSparkRunFacet
      Returns:
      a new builder for GcpDataprocSparkRunFacet
    • newLifecycleStateChangeDatasetFacetPreviousIdentifier

      public OpenLineage.LifecycleStateChangeDatasetFacetPreviousIdentifier newLifecycleStateChangeDatasetFacetPreviousIdentifier(String name, String namespace)
      Factory method for LifecycleStateChangeDatasetFacetPreviousIdentifier
      Parameters:
      name - the name
      namespace - the namespace
      Returns:
      LifecycleStateChangeDatasetFacetPreviousIdentifier
    • newLifecycleStateChangeDatasetFacetPreviousIdentifierBuilder

      public OpenLineage.LifecycleStateChangeDatasetFacetPreviousIdentifierBuilder newLifecycleStateChangeDatasetFacetPreviousIdentifierBuilder()
      Creates a builder for LifecycleStateChangeDatasetFacetPreviousIdentifier
      Returns:
      a new builder for LifecycleStateChangeDatasetFacetPreviousIdentifier
    • newJob

      public OpenLineage.Job newJob(String namespace, String name, OpenLineage.JobFacets facets)
      Factory method for Job
      Parameters:
      namespace - The namespace containing that job
      name - The unique name for that job within that namespace
      facets - The job facets.
      Returns:
      Job
    • newJobBuilder

      public OpenLineage.JobBuilder newJobBuilder()
      Creates a builder for Job
      Returns:
      a new builder for Job
    • newDatasetVersionDatasetFacet

      public OpenLineage.DatasetVersionDatasetFacet newDatasetVersionDatasetFacet(String datasetVersion)
      Factory method for DatasetVersionDatasetFacet
      Parameters:
      datasetVersion - The version of the dataset.
      Returns:
      DatasetVersionDatasetFacet
    • newDatasetVersionDatasetFacetBuilder

      public OpenLineage.DatasetVersionDatasetFacetBuilder newDatasetVersionDatasetFacetBuilder()
      Creates a builder for DatasetVersionDatasetFacet
      Returns:
      a new builder for DatasetVersionDatasetFacet
    • newSymlinksDatasetFacetIdentifiers

      public OpenLineage.SymlinksDatasetFacetIdentifiers newSymlinksDatasetFacetIdentifiers(String namespace, String name, String type)
      Factory method for SymlinksDatasetFacetIdentifiers
      Parameters:
      namespace - The dataset namespace
      name - The dataset name
      type - Identifier type
      Returns:
      SymlinksDatasetFacetIdentifiers
    • newSymlinksDatasetFacetIdentifiersBuilder

      public OpenLineage.SymlinksDatasetFacetIdentifiersBuilder newSymlinksDatasetFacetIdentifiersBuilder()
      Creates a builder for SymlinksDatasetFacetIdentifiers
      Returns:
      a new builder for SymlinksDatasetFacetIdentifiers
    • newDataQualityAssertionsDatasetFacetAssertions

      public OpenLineage.DataQualityAssertionsDatasetFacetAssertions newDataQualityAssertionsDatasetFacetAssertions(String assertion, Boolean success, String column)
      Factory method for DataQualityAssertionsDatasetFacetAssertions
      Parameters:
      assertion - Type of expectation test that dataset is subjected to
      success - the success
      column - Column that expectation is testing. It should match the name provided in SchemaDatasetFacet. If column field is empty, then expectation refers to whole dataset.
      Returns:
      DataQualityAssertionsDatasetFacetAssertions
    • newDataQualityAssertionsDatasetFacetAssertionsBuilder

      public OpenLineage.DataQualityAssertionsDatasetFacetAssertionsBuilder newDataQualityAssertionsDatasetFacetAssertionsBuilder()
      Creates a builder for DataQualityAssertionsDatasetFacetAssertions
      Returns:
      a new builder for DataQualityAssertionsDatasetFacetAssertions
    • newParentRunFacetRun

      public OpenLineage.ParentRunFacetRun newParentRunFacetRun(UUID runId)
      Factory method for ParentRunFacetRun
      Parameters:
      runId - The globally unique ID of the run associated with the job.
      Returns:
      ParentRunFacetRun
    • newParentRunFacetRunBuilder

      public OpenLineage.ParentRunFacetRunBuilder newParentRunFacetRunBuilder()
      Creates a builder for ParentRunFacetRun
      Returns:
      a new builder for ParentRunFacetRun
    • newColumnLineageDatasetFacetFieldsAdditionalInputFields

      public OpenLineage.ColumnLineageDatasetFacetFieldsAdditionalInputFields newColumnLineageDatasetFacetFieldsAdditionalInputFields(String namespace, String name, String field, List<OpenLineage.ColumnLineageDatasetFacetFieldsAdditionalInputFieldsTransformations> transformations)
      Factory method for ColumnLineageDatasetFacetFieldsAdditionalInputFields
      Parameters:
      namespace - The input dataset namespace
      name - The input dataset name
      field - The input field
      transformations - the transformations
      Returns:
      ColumnLineageDatasetFacetFieldsAdditionalInputFields
    • newColumnLineageDatasetFacetFieldsAdditionalInputFieldsBuilder

      public OpenLineage.ColumnLineageDatasetFacetFieldsAdditionalInputFieldsBuilder newColumnLineageDatasetFacetFieldsAdditionalInputFieldsBuilder()
      Creates a builder for ColumnLineageDatasetFacetFieldsAdditionalInputFields
      Returns:
      a new builder for ColumnLineageDatasetFacetFieldsAdditionalInputFields
    • newOutputDatasetFacet

      public OpenLineage.OutputDatasetFacet newOutputDatasetFacet()
      Returns:
      OutputDatasetFacet
    • newStaticDataset

      public OpenLineage.StaticDataset newStaticDataset(String namespace, String name, OpenLineage.DatasetFacets facets)
      Factory method for StaticDataset
      Parameters:
      namespace - The namespace containing that dataset
      name - The unique name for that dataset within that namespace
      facets - The facets for this dataset
      Returns:
      StaticDataset
    • newStaticDatasetBuilder

      public OpenLineage.StaticDatasetBuilder newStaticDatasetBuilder()
      Creates a builder for StaticDataset
      Returns:
      a new builder for StaticDataset
    • newColumnLineageDatasetFacetFields

      public OpenLineage.ColumnLineageDatasetFacetFields newColumnLineageDatasetFacetFields()
      Factory method for ColumnLineageDatasetFacetFields
      Returns:
      ColumnLineageDatasetFacetFields
    • newColumnLineageDatasetFacetFieldsBuilder

      public OpenLineage.ColumnLineageDatasetFacetFieldsBuilder newColumnLineageDatasetFacetFieldsBuilder()
      Creates a builder for ColumnLineageDatasetFacetFields
      Returns:
      a new builder for ColumnLineageDatasetFacetFields
    • newSymlinksDatasetFacet

      Factory method for SymlinksDatasetFacet
      Parameters:
      identifiers - the identifiers
      Returns:
      SymlinksDatasetFacet
    • newSymlinksDatasetFacetBuilder

      public OpenLineage.SymlinksDatasetFacetBuilder newSymlinksDatasetFacetBuilder()
      Creates a builder for SymlinksDatasetFacet
      Returns:
      a new builder for SymlinksDatasetFacet
    • newStorageDatasetFacet

      public OpenLineage.StorageDatasetFacet newStorageDatasetFacet(String storageLayer, String fileFormat)
      Factory method for StorageDatasetFacet
      Parameters:
      storageLayer - Storage layer provider with allowed values: iceberg, delta.
      fileFormat - File format with allowed values: parquet, orc, avro, json, csv, text, xml.
      Returns:
      StorageDatasetFacet
    • newStorageDatasetFacetBuilder

      public OpenLineage.StorageDatasetFacetBuilder newStorageDatasetFacetBuilder()
      Creates a builder for StorageDatasetFacet
      Returns:
      a new builder for StorageDatasetFacet
    • newColumnLineageDatasetFacetFieldsAdditionalInputFieldsTransformations

      public OpenLineage.ColumnLineageDatasetFacetFieldsAdditionalInputFieldsTransformations newColumnLineageDatasetFacetFieldsAdditionalInputFieldsTransformations(String type, String subtype, String description, Boolean masking)
      Factory method for ColumnLineageDatasetFacetFieldsAdditionalInputFieldsTransformations
      Parameters:
      type - The type of the transformation. Allowed values are: DIRECT, INDIRECT
      subtype - The subtype of the transformation
      description - a string representation of the transformation applied
      masking - is transformation masking the data or not
      Returns:
      ColumnLineageDatasetFacetFieldsAdditionalInputFieldsTransformations
    • newColumnLineageDatasetFacetFieldsAdditionalInputFieldsTransformationsBuilder

      public OpenLineage.ColumnLineageDatasetFacetFieldsAdditionalInputFieldsTransformationsBuilder newColumnLineageDatasetFacetFieldsAdditionalInputFieldsTransformationsBuilder()
      Creates a builder for ColumnLineageDatasetFacetFieldsAdditionalInputFieldsTransformations
      Returns:
      a new builder for ColumnLineageDatasetFacetFieldsAdditionalInputFieldsTransformations
    • newInputDataset

      public OpenLineage.InputDataset newInputDataset(String namespace, String name, OpenLineage.DatasetFacets facets, OpenLineage.InputDatasetInputFacets inputFacets)
      Factory method for InputDataset
      Parameters:
      namespace - The namespace containing that dataset
      name - The unique name for that dataset within that namespace
      facets - The facets for this dataset
      inputFacets - The input facets for this dataset.
      Returns:
      InputDataset
    • newInputDatasetBuilder

      public OpenLineage.InputDatasetBuilder newInputDatasetBuilder()
      Creates a builder for InputDataset
      Returns:
      a new builder for InputDataset
    • newDatasourceDatasetFacet

      public OpenLineage.DatasourceDatasetFacet newDatasourceDatasetFacet(String name, URI uri)
      Factory method for DatasourceDatasetFacet
      Parameters:
      name - the name
      uri - the uri
      Returns:
      DatasourceDatasetFacet
    • newDatasourceDatasetFacetBuilder

      public OpenLineage.DatasourceDatasetFacetBuilder newDatasourceDatasetFacetBuilder()
      Creates a builder for DatasourceDatasetFacet
      Returns:
      a new builder for DatasourceDatasetFacet
    • newLifecycleStateChangeDatasetFacet

      Factory method for LifecycleStateChangeDatasetFacet
      Parameters:
      lifecycleStateChange - The lifecycle state change.
      previousIdentifier - Previous name of the dataset in case of renaming it.
      Returns:
      LifecycleStateChangeDatasetFacet
    • newLifecycleStateChangeDatasetFacetBuilder

      public OpenLineage.LifecycleStateChangeDatasetFacetBuilder newLifecycleStateChangeDatasetFacetBuilder()
      Creates a builder for LifecycleStateChangeDatasetFacet
      Returns:
      a new builder for LifecycleStateChangeDatasetFacet