Data Referencing Feature

Table of Content

Description

Besides validation the data type and value restrictions for a point within the data structure nadap provides the ability to verify that:

  • a value is unique within the data structure (even at other points in the data schema),
  • a value matches to values at other points in data schema (consumer -> producer) and
  • consuming values does not consums in sum more the producers creates by defining credits.

In addition, all those reference tests happen in a namespace. Validating multiple data can use the same namespace and thus values can be reference among them. But the namespace can also be changed to validate new data within it without the direct referencing to the previous validated data. To have further control on referencing, you can define the scope of the reference to be global (in all namespaces) or only with in the namespace. I.e. same values must be global unique and other values must be unique only within the current namespace.

The String data type provides a special feature called *Namespace Lookup". By defining a namespace separator character the string can be split into namespace name and string value. If the separator character is in the data string, the referencing engine will validate/reference the value in the given namespace instead the current one.

The reference option keys are documented within the data type documentation.

Examples

Uniqueness Validation

import yaml
import nadap

schema_definition_yaml = """
root:
  type: list
  elements:
    type: dict
    keys:
        id:
          type: int
          reference: person_id
        name: str
"""

# Correct data
data1_yaml = """
- id: 1
  name: Nadap
- id: 2
  name: Other
- id: 3
  name: Unkown
"""

# 'id' of 'Other' is not unique; used by 'Nadap'
data2_yaml = """
- id: 1
  name: Nadap
- id: 1
  name: Other
"""

schema_def = yaml.load(schema_definition_yaml, Loader=yaml.SafeLoader)
n = nadap.Nadap()
n.schema = schema_def

data1 = yaml.load(data1_yaml, Loader=yaml.SafeLoader)
try:
    n.validate(data1)
except nadap.DataValidationError:
    print("Data1 fails:")
    for finding in n.findings:
        print(finding)

# Recreate a Nadap instance to clear referencing cache
n = nadap.Nadap()
n.schema = schema_def
data2 = yaml.load(data2_yaml, Loader=yaml.SafeLoader)
try:
    n.validate(data2)
except nadap.DataValidationError:
    print("Data2 fails:")
    for finding in n.findings:
        print(finding)
else:
    if n.findings:
        print("Data2 referencing fails:")
        for finding in n.findings:
            print(finding)

Will print this output:

Data2 referencing fails:
[1].id: Reference already defined at [0].id

Consumer/Producer Validation

import yaml
import nadap

schema_definition_yaml = """
root:
  type: dict
  keys:
    producers:
        type: list
        elements:
          type: str
          reference:
            key: ref
            mode: producer
    consumers:
        type: list
        elements:
          type: str
          reference:
            key: ref
            mode: consumer
"""

data_yaml = """
producers:
  - value1
  - value2
consumers:
  - value1
  - value1
  - value2
  - value3
"""

schema_def = yaml.load(schema_definition_yaml, Loader=yaml.SafeLoader)
n = nadap.Nadap()
n.schema = schema_def

data = yaml.load(data_yaml, Loader=yaml.SafeLoader)
try:
    n.validate(data)
except nadap.DataValidationError:
    print("Data fails:")
    for finding in n.findings:
        print(finding)
else:
    if n.findings:
        print("Data referencing fails:")
        for finding in n.findings:
            print(finding)

Will print this output:

Data referencing fails:
.consumers[3]: No producer found

Consumer/Producer Validation with Credits

import yaml
import nadap

schema_definition_yaml = """
root:
  type: dict
  keys:
    producers:
        type: list
        elements:
          type: str
          reference:
            key: ref
            mode: producer
            credits: 10
    consumers:
        type: list
        elements:
          type: str
          reference:
            key: ref
            mode: consumer
            credits: 10
"""

data_yaml = """
producers:
  - value1
  - value2
consumers:
  - value1
  - value1
  - value2
  - value3
"""

schema_def = yaml.load(schema_definition_yaml, Loader=yaml.SafeLoader)
n = nadap.Nadap()
n.schema = schema_def

data = yaml.load(data_yaml, Loader=yaml.SafeLoader)
try:
    n.validate(data)
except nadap.DataValidationError:
    print("Data fails:")
    for finding in n.findings:
        print(finding)
else:
    if n.findings:
        print("Data referencing fails:")
        for finding in n.findings:
            print(finding)

Will print this output:

Data referencing fails:
n.a.: Global reference key 'ref': Consumer credits exceeds producer credits for value 'value1'
.consumers[3]: No producer found
n.a.: Namespace '' reference key 'ref': Consumer credits exceeds producer credits for value 'value1'

Namespace-Aware Uniqueness Validation

import yaml
import nadap

schema_definition_yaml = """
root:
  type: list
  elements:
    type: dict
    keys:
        id:
          type: int
          reference:
            key: person_id
            unique_scope: namespace
        name: str
"""

data1_yaml = """
- id: 1
  name: Nadap
- id: 2
  name: Other
- id: 3
  name: Unkown
"""
data2_yaml = """
- id: 1
  name: New_Nadap
"""

schema_def = yaml.load(schema_definition_yaml, Loader=yaml.SafeLoader)
n = nadap.Nadap()
n.schema = schema_def
n.switch_namespace("ns1")

data1 = yaml.load(data1_yaml, Loader=yaml.SafeLoader)
try:
    n.validate(data1)
    n.switch_namespace("ns2")
    n.validate(data1)
except nadap.DataValidationError:
    print("Data1 fails:")
    for finding in n.findings:
        print(finding)

data2 = yaml.load(data2_yaml, Loader=yaml.SafeLoader)
try:
    # Test data2 in namespace 'ns2'
    n.validate(data2)
except nadap.DataValidationError:
    print("Data2 fails:")
    for finding in n.findings:
        print(finding)
else:
    if n.findings:
        print("Data2 referencing fails:")
        for finding in n.findings:
            print(finding)

Will print this output:

Data2 referencing fails:
[0].id: Reference already defined at [0].id

Namespace-Lookup Validation

import yaml
import nadap

schema1_definition_yaml = """
root:
  type: list
  elements:
    type: str
    reference: person_name
"""
schema2_definition_yaml = """
root:
  type: list
  elements:
    type: str
    reference:
      key: person_name
      mode: consumer
      consumer_scope: namespace
      namespace_separator_char: '.'
"""

schema_def = yaml.load(schema1_definition_yaml, Loader=yaml.SafeLoader)
n = nadap.Nadap()
n.schema = schema_def
n.switch_namespace("ns1")

try:
    n.validate(
        [
            "nadap",
            "rulez!"
        ]
    )
except nadap.DataValidationError:
    print("Data1 fails:")
    for finding in n.findings:
        print(finding)

n.switch_namespace("ns2")
schema_def = yaml.load(schema2_definition_yaml, Loader=yaml.SafeLoader)
n.schema = schema_def
try:
    n.validate(
        [
            "ns1.nadap",
            "ns1.test",
            "nsXX.test"
        ]
    )
except nadap.DataValidationError:
    print("Data2 fails:")
    for finding in n.findings:
        print(finding)
else:
    if n.findings:
        print("Data2 referencing fails:")
        for finding in n.findings:
            print(finding)

Will print this output:

Data2 referencing fails:
ns2 > [1]: No producer found in 'ns1'
ns2 > [2]: No producer found in 'nsXX'