ndarray-1.0.0.yaml 11.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376
%YAML 1.1
---
$schema: "http://stsci.edu/schemas/asdf/asdf-schema-1.0.0"
id: "http://stsci.edu/schemas/asdf/core/ndarray-1.0.0"
tag: "tag:stsci.edu:asdf/core/ndarray-1.0.0"

title: >
  An *n*-dimensional array.

description: |
  There are two ways to store the data in an ndarray.

  - Inline in the tree: This is recommended only for small arrays.  In
    this case, the entire ``ndarray`` tag may be a nested list, in
    which case the type of the array is inferred from the content.
    (See the rules for type inference in the ``inline-data``
    definition below.)  The inline data may also be given in the
    ``data`` property, in which case it is possible to explicitly
    specify the ``datatype`` and other properties.

  - External to the tree: The data comes from a [block](ref:block)
    within the same ASDF file or an external ASDF file referenced by a
    URI.

examples:
  -
    - An inline array, with implicit data type
    - |
        !core/ndarray-1.0.0
          [[1, 0, 0],
           [0, 1, 0],
           [0, 0, 1]]

  -
    - An inline array, with an explicit data type
    - |
        !core/ndarray-1.0.0
          datatype: float64
          data:
            [[1, 0, 0],
             [0, 1, 0],
             [0, 0, 1]]

  -
    - An inline structured array, where the types of each column are
      automatically detected
    - |
        !core/ndarray-1.0.0
          [[M110, 110, 205, And],
           [ M31,  31, 224, And],
           [ M32,  32, 221, And],
           [M103, 103, 581, Cas]]

  -
    - An inline structured array, where the types of each column are
      explicitly specified
    - |
        !core/ndarray-1.0.0
          datatype: [['ascii', 4], uint16, uint16, ['ascii', 4]]
          data:
            [[M110, 110, 205, And],
             [ M31,  31, 224, And],
             [ M32,  32, 221, And],
             [M103, 103, 581, Cas]]

  -
    - A double-precision array, in contiguous memory in a block within
      the same file
    - |
        !core/ndarray-1.0.0
          source: 0
          shape: [1024, 1024]
          datatype: float64
          byteorder: little

  -
    - A view of a tile in that image
    - |
        !core/ndarray-1.0.0
          source: 0
          shape: [256, 256]
          datatype: float64
          byteorder: little
          strides: [8192, 8]
          offset: 2099200

  -
    - A structured datatype, with nested columns for a coordinate in
      (*ra*, *dec*), and a 3x3 convolution kernel
    - |
        !core/ndarray-1.0.0
          source: 0
          shape: [64]
          datatype:
            - name: coordinate
              datatype:
                - name: ra
                  datatype: float64
                - name: dec
                  datatype: float64
            - name: kernel
              datatype: float32
              shape: [3, 3]
          byteorder: little

  -
    - An array in Fortran order
    - |
        !core/ndarray-1.0.0
          source: 0
          shape: [1024, 1024]
          datatype: float64
          byteorder: little
          strides: [8192, 8]

  -
    - An array where values of -999 are treated as missing
    - |
        !core/ndarray-1.0.0
          source: 0
          shape: [256, 256]
          datatype: float64
          byteorder: little
          mask: -999

  -
    - An array where another array is used as a mask
    - |
        !core/ndarray-1.0.0
          source: 0
          shape: [256, 256]
          datatype: float64
          byteorder: little
          mask: !core/ndarray-1.0.0
            source: 1
            shape: [256, 256]
            datatype: bool8
            byteorder: little

  -
    - An array where the data is stored in the first block in
      another ASDF file.
    - |
        !core/ndarray-1.0.0
          source: external.asdf
          shape: [256, 256]
          datatype: float64
          byteorder: little

definitions:
  scalar-datatype:
    description: |
      Describes the type of a single element.

      There is a set of numeric types, each with a single identifier:

      - `int8`, `int16`, `int32`, `int64`: Signed integer types, with
        the given bit size.

      - `uint8`, `uint16`, `uint32`, `uint64`: Unsigned integer types,
        with the given bit size.

      - `float32`: Single-precision floating-point type or "binary32",
        as defined in IEEE 754.

      - `float64`: Double-precision floating-point type or "binary64",
        as defined in IEEE 754.

      - `complex64`: Complex number where the real and imaginary parts
        are each single-precision floating-point ("binary32") numbers,
        as defined in IEEE 754.

      - `complex128`: Complex number where the real and imaginary
        parts are each double-precision floating-point ("binary64")
        numbers, as defined in IEEE 754.

      There are two distinct fixed-length string types, which must
      be indicated with a 2-element array where the first element is an
      identifier for the string type, and the second is a length:

      - `ascii`: A string containing ASCII text (all codepoints <
        128), where each character is 1 byte.

      - `ucs4`: A string containing unicode text in the UCS-4
        encoding, where each character is always 4 bytes long.  Here
        the number of bytes used is 4 times the given length.

    anyOf:
      - type: string
        enum: [int8, uint8, int16, uint16, int32, uint32, int64, uint64,
               float32, float64, complex64, complex128, bool8]
      - type: array
        items:
          - type: string
            enum: [ascii, ucs4]
          - type: integer
            minimum: 0
        minLength: 2
        maxLength: 2

  datatype:
    description: |
      The data format of the array elements.  May be a single scalar
      datatype, or may be a nested list of datatypes.  When a list, each field
      may have a name.
    anyOf:
      - $ref: "#/definitions/scalar-datatype"
      - type: array
        items:
          anyOf:
            - $ref: "#/definitions/scalar-datatype"
            - type: object
              properties:
                name:
                  type: string
                  pattern: "[A-Za-z_][A-Za-z0-9_]*"
                  description: The name of the field
                datatype:
                  $ref: "#/definitions/datatype"
                byteorder:
                  type: string
                  enum: [big, little]
                  description: |
                    The byteorder for the field.  If not provided, the
                    byteorder of the datatype as a whole will be used.
                shape:
                  type: array
                  items:
                    type: integer
                    minimum: 0
              required: [datatype]

  inline-data:
    description: |
      Inline data is stored in YAML format directly in the tree, rather than
      referencing a binary block.  It is made out of nested lists.

      If the datatype of the array is not specified, it is inferred from
      the array contents.  Type inference is supported only for
      homogeneous arrays, not tables.

      - If any of the elements in the array are YAML strings, the
        `datatype` of the entire array is `ucs4`, with the width of
        the largest string in the column, otherwise...

      - If any of the elements in the array are complex numbers, the
        `datatype` of the entire column is `complex128`, otherwise...

      - If any of the types in the column are numbers with a decimal
        point, the `datatype` of the entire column is `float64`,
        otherwise..

      - If any of the types in the column are integers, the `datatype`
        of the entire column is `int64`, otherwise...

      - The `datatype` of the entire column is `bool8`.

      Masked values may be included in the array using `null`.  If an
      explicit mask array is also provided, it takes precedence.

    type: array
    items:
      anyOf:
        - type: number
        - type: string
        - type: "null"
        - $ref: "complex-1.0.0"
        - $ref: "#/definitions/inline-data"
        - type: boolean

anyOf:
  - $ref: "#/definitions/inline-data"
  - type: object
    properties:
      source:
        description: |
          The source of the data.

          - If an integer: If positive, the zero-based index of the
            block within the same file. If negative, the index from
            the last block within the same file.  For example, a
            source of `-1` corresponds to the last block in the same
            file.

          - If a string, a URI to an external ASDF file containing the
            block data.  Relative URIs and ``file:`` and ``http:``
            protocols must be supported.  Other protocols may be supported
            by specific library implementations.

          The ability to reference block data in an external ASDF file
          is intentionally limited to the first block in the external
          ASDF file, and is intended only to support the needs of
          [exploded](ref:exploded).  For the more general case of
          referencing data in an external ASDF file, use tree
          [references](ref:references).

        anyOf:
          - type: integer
          - type: string
            format: uri

      data:
        description: |
          The data for the array inline.

          If `datatype` and/or `shape` are also provided, they must
          match the data here and can be used as a consistency check.
          `strides`, `offset` and `byteorder` are meaningless when
          `data` is provided.
        $ref: "#/definitions/inline-data"

      shape:
        description: |
          The shape of the array.

          The first entry may be the string `*`, indicating that the
          length of the first index of the array will be automatically
          determined from the size of the block.  This is used for
          streaming support.
        type: array
        items:
          anyOf:
            - type: integer
              minimum: 0
            - enum: ['*']

      datatype:
        description: |
          The data format of the array elements.
        $ref: "#/definitions/datatype"

      byteorder:
        description: >
          The byte order (big- or little-endian) of the array data.
        type: string
        enum: [big, little]

      offset:
        description: >
          The offset, in bytes, within the data for this start of this
          view.
        type: integer
        minimum: 0
        default: 0

      strides:
        description: >
          The number of bytes to skip in each dimension.  If not provided,
          the array is assumed by be contiguous and in C order.  If
          provided, must be the same length as the shape property.
        type: array
        items:
          anyOf:
            - type: integer
              minimum: 1
            - type: integer
              maximum: -1

      mask:
        description: >
          Describes how missing values in the array are stored.  If a
          scalar number, that number is used to represent missing values.
          If an ndarray, the given array provides a mask, where non-zero
          values represent missing values in this array.  The mask array
          must be broadcastable to the dimensions of this array.
        anyOf:
          - type: number
          - $ref: "complex-1.0.0"
          - allOf:
            - $ref: "ndarray-1.0.0"
            - datatype: bool8

    dependencies:
      source: [shape, datatype, byteorder]

    propertyOrder: [source, data, mask, datatype, byteorder, shape, offset, strides]