Compare revisions

Étienne Mollier · Étienne Mollier · Étienne Mollier · Étienne Mollier · Étienne Mollier · 597c7d8c
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -54,23 +54,4 @@ jobs:
    needs: [test]
    runs-on: ubuntu-latest
    steps:
-      - env:
-          TRAVIS_AUTH_TOKEN: ${{ secrets.TRAVIS_AUTH_TOKEN }}
-        run: |
-          set -euo pipefail
-
-          echo "Pinging Travis CI to rebuild Docker image"
-
-          body='{
-            "request": {
-              "branch": "master",
-              "message": "Build triggered from augur"
-            }
-          }'
-
-          curl -X POST https://api.travis-ci.com/repo/nextstrain%2Fdocker-base/requests \
-            -H "Content-Type: application/json" \
-            -H "Accept: application/json" \
-            -H "Travis-API-Version: 3" \
-            -H "Authorization: token $TRAVIS_AUTH_TOKEN" \
-            -d "$body"
+    - run: gh workflow run ci.yml --repo nextstrain/docker-base
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
+name: Publish to PyPI
+on:
+  workflow_dispatch:
+    inputs:
+      version:
+        description: 'New version X.X.X'
+        required: true
+        type: string
+      pypi_instance:
+        # PyPI has a separate instance which can be used for testing purposes.
+        description: 'PyPI instance for publishing'
+        required: true
+        default: 'PyPI'
+        type: choice
+        options:
+        - 'TestPyPI'
+        - 'PyPI'
+jobs:
+  run:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          # Fetch all branches and tags.
+          fetch-depth: 0
+      - uses: actions/setup-python@v2
+        with:
+          python-version: '3.10'
+      - name: Set Nextstrain bot as git user
+        run: |
+          git config --global user.email "hello@nextstrain.org"
+          git config --global user.name "Nextstrain bot"
+      - run: python3 -m pip install --upgrade build twine
+      - run: devel/release ${{ github.event.inputs.version}}
+      - run: devel/test
+      - uses: actions/upload-artifact@v3
+        with:
+          name: dist
+          path: dist/
+      - run: git push origin master release tag ${{ github.event.inputs.version}}
+      - name: 'Publish to TestPyPI'
+        if: ${{ github.event.inputs.pypi_instance == 'TestPyPI' }}
+        run: twine upload dist/*
+        env:
+          TWINE_USERNAME: __token__
+          TWINE_PASSWORD: ${{ secrets.TEST_PYPI_API_TOKEN }}
+          TWINE_REPOSITORY_URL: https://test.pypi.org/legacy/
+      - name: 'Publish to PyPI'
+        if: ${{ github.event.inputs.pypi_instance == 'PyPI' }}
+        run: twine upload dist/*
+        env:
+          TWINE_USERNAME: __token__
+          TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
+          TWINE_REPOSITORY_URL: https://upload.pypi.org/legacy/
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -3,6 +3,22 @@
 ## __NEXT__


+## 15.0.2 (5 May 2022)
+
+### Bug Fixes
+
+* docs: Fix API documentation rendering and add page for `io` module [#896][] (@joverlee521)
+* CI: Use GitHub Actions for release process [#904][] (@victorlin)
+* utils: Fix branch length annotations in `json_to_tree` function [#908][] (@huddlej)
+* export v2: Use io.read_metadata during export, fixing a bug caused when the user's input metadata does not have any valid strain id columns [#909][] (@huddlej)
+* CI: Call new GitHub Actions workflow to rebuild images [#910][] (@victorlin)
+
+[#910]: https://github.com/nextstrain/augur/pull/910
+[#909]: https://github.com/nextstrain/augur/pull/909
+[#908]: https://github.com/nextstrain/augur/pull/908
+[#904]: https://github.com/nextstrain/augur/pull/904
+[#896]: https://github.com/nextstrain/augur/pull/896
+
 ## 15.0.1 (25 April 2022)

 ### Bug Fixes

--- a/augur/__version__.py
+++ b/augur/__version__.py
-__version__ = '15.0.1'
+__version__ = '15.0.2'


 def is_augur_version_compatible(version):

--- a/augur/align.py
+++ b/augur/align.py
@@ -266,6 +266,7 @@ def strip_non_reference(aln, reference, insertion_csv=None):
    list
        list of trimmed sequences, effectively a multiple alignment

+
    Tests
    -----
    >>> [s.name for s in strip_non_reference(read_alignment("tests/data/align/test_aligned_sequences.fasta"), "with_gaps")]

--- a/augur/clades.py
+++ b/augur/clades.py
@@ -18,21 +18,21 @@ def read_in_clade_definitions(clade_file):
    Inheritance is allowed, but needs to be acyclic. Alleles can be overwritten by inheriting clades.

    Sites are 1 indexed in the file, and are converted to 0 indexed in the output
-    
+
    Empty lines are ignored, comments after # are ignored

-    Format
-    ------
-    clade      gene    site     alt
-    Clade_1    ctpE    81       D
-    Clade_2    nuc     30642    T
-    Clade_3    nuc     444296   A
-    Clade_3    S       1        P
-    \\# Clade_4 inherits from Clade_3
-    Clade_4    clade   Clade_3
-    Clade_4    pks8    634      T
-    \\# Inherited allele can be overwritten
-    Clade_4    S       1        L
+    Format::
+
+        clade      gene    site     alt
+        Clade_1    ctpE    81       D
+        Clade_2    nuc     30642    T
+        Clade_3    nuc     444296   A
+        Clade_3    S       1        P
+        # Clade_4 inherits from Clade_3
+        Clade_4    clade   Clade_3
+        Clade_4    pks8    634      T
+        # Inherited allele can be overwritten
+        Clade_4    S       1        L

    Parameters
    ----------
@@ -74,14 +74,14 @@ def read_in_clade_definitions(clade_file):
    # This way all clades can be reached by traversal
    for clade in df.clade.unique():
        G.add_edge(root, clade)
-    
+
    # Build inheritance graph
    # For clades that inherit, disconnect from root
    # Add edge from parent
    for _, row in clade_inheritance_rows.iterrows():
        G.remove_edge(root, row.clade)
        G.add_edge(row.site, row.clade)
-    
+
    if not nx.is_directed_acyclic_graph(G):
        raise ValueError(f"Clade definitions contain cycles {list(nx.simple_cycles(G))}")

@@ -89,7 +89,7 @@ def read_in_clade_definitions(clade_file):
    # Topological sort ensures parents are visited before children
    # islice is used to skip the root node (which has no parent)
    for clade in islice(nx.topological_sort(G),1,None):
-        # Get name of parent clade 
+        # Get name of parent clade
        # G.predecessors(clade) returns iterator, thus next() necessary
        # despite the fact that there should only be one parent
        parent_clade = next(G.predecessors(clade))
@@ -99,7 +99,7 @@ def read_in_clade_definitions(clade_file):
        for _, row in df[(df.clade == clade) & (df.gene != 'clade')].iterrows():
            # Overwrite of parent alleles is possible and happens here
            clades[clade][(row.gene, int(row.site)-1)] = row.alt
-    
+
    # Convert items from dict[str, dict[(str,int),str]] to dict[str, list[(str,int,str)]]
    clades = {
        clade: [
@@ -110,7 +110,7 @@ def read_in_clade_definitions(clade_file):
        # If clause avoids root (helper) from being emmitted
        if clade != root
    }
-    
+
    return clades



--- a/augur/distance.py
+++ b/augur/distance.py
@@ -177,6 +177,7 @@ def read_distance_map(map_file):
    dict :
        Python representation of the distance map JSON

+
    >>> sorted(read_distance_map("tests/data/distance_map_weight_per_site.json").items())
    [('default', 0), ('map', {'HA1': {144: 1}})]
    >>> sorted(read_distance_map("tests/data/distance_map_weight_per_site_and_sequence.json").items())
@@ -236,6 +237,7 @@ def get_distance_between_nodes(node_a_sequences, node_b_sequences, distance_map,
    float :
        distance between node sequences based on the given map

+
    >>> node_a_sequences = {"gene": "ACTG"}
    >>> node_b_sequences = {"gene": "ACGG"}
    >>> distance_map = {"default": 0, "map": {}}

--- a/augur/export_v2.py
+++ b/augur/export_v2.py
@@ -9,7 +9,9 @@ import warnings
 import numbers
 import re
 from Bio import Phylo
-from .utils import read_metadata, read_node_data, write_json, read_config, read_lat_longs, read_colors
+
+from .io import read_metadata
+from .utils import read_node_data, write_json, read_config, read_lat_longs, read_colors
 from .validate import export_v2 as validate_v2, auspice_config_v2 as validate_auspice_config_v2, ValidateError

 # Set up warnings & exceptions
@@ -565,6 +567,7 @@ def set_data_provenance(data_json, config):
    config : dict
        config JSON with an expected ``data_provenance`` key

+
    >>> config = {"data_provenance": [{"name": "GISAID"}, {"name": "INSDC"}]}
    >>> data_json = {"meta": {}}
    >>> set_data_provenance(data_json, config)
@@ -992,10 +995,16 @@ def run_v2(args):

    if args.metadata is not None:
        try:
-            metadata_file, _ = read_metadata(args.metadata)
+            metadata_file = read_metadata(args.metadata).to_dict(orient="index")
+            for strain in metadata_file.keys():
+                if "strain" not in metadata_file[strain]:
+                    metadata_file[strain]["strain"] = strain
        except FileNotFoundError:
-            print(f"ERROR: meta data file ({args.metadata}) does not exist")
+            print(f"ERROR: meta data file ({args.metadata}) does not exist", file=sys.stderr)
            sys.exit(2)
+        except Exception as error:
+            print(f"ERROR: {error}", file=sys.stderr)
+            sys.exit(1)
    else:
        metadata_file = {}


--- a/augur/filter.py
+++ b/augur/filter.py
@@ -92,6 +92,7 @@ def filter_by_exclude_all(metadata):
    set[str]:
        Empty set of strains

+
    >>> metadata = pd.DataFrame([{"region": "Africa"}, {"region": "Europe"}], index=["strain1", "strain2"])
    >>> filter_by_exclude_all(metadata)
    set()
@@ -114,6 +115,7 @@ def filter_by_exclude(metadata, exclude_file):
    set[str]:
        Strains that pass the filter

+
    >>> metadata = pd.DataFrame([{"region": "Africa"}, {"region": "Europe"}], index=["strain1", "strain2"])
    >>> with NamedTemporaryFile(delete=False) as exclude_file:
    ...     characters_written = exclude_file.write(b'strain1')
@@ -143,6 +145,7 @@ def parse_filter_query(query):
    str :
        Value of column to query

+
    >>> parse_filter_query("property=value")
    ('property', <built-in function eq>, 'value')
    >>> parse_filter_query("property!=value")
@@ -177,6 +180,7 @@ def filter_by_exclude_where(metadata, exclude_where):
    set[str]:
        Strains that pass the filter

+
    >>> metadata = pd.DataFrame([{"region": "Africa"}, {"region": "Europe"}], index=["strain1", "strain2"])
    >>> filter_by_exclude_where(metadata, "region!=Europe")
    {'strain2'}
@@ -228,6 +232,7 @@ def filter_by_query(metadata, query):
    set[str]:
        Strains that pass the filter

+
    >>> metadata = pd.DataFrame([{"region": "Africa"}, {"region": "Europe"}], index=["strain1", "strain2"])
    >>> filter_by_query(metadata, "region == 'Africa'")
    {'strain1'}
@@ -256,6 +261,7 @@ def filter_by_ambiguous_date(metadata, date_column="date", ambiguity="any"):
    set[str]:
        Strains that pass the filter

+
    >>> metadata = pd.DataFrame([{"region": "Africa", "date": "2020-01-XX"}, {"region": "Europe", "date": "2020-01-02"}], index=["strain1", "strain2"])
    >>> filter_by_ambiguous_date(metadata)
    {'strain2'}
@@ -298,6 +304,7 @@ def filter_by_date(metadata, date_column="date", min_date=None, max_date=None):
    set[str]:
        Strains that pass the filter

+
    >>> metadata = pd.DataFrame([{"region": "Africa", "date": "2020-01-01"}, {"region": "Europe", "date": "2020-01-02"}], index=["strain1", "strain2"])
    >>> filter_by_date(metadata, min_date=numeric_date("2020-01-02"))
    {'strain2'}
@@ -352,6 +359,7 @@ def filter_by_sequence_index(metadata, sequence_index):
    set[str]:
        Strains that pass the filter

+
    >>> metadata = pd.DataFrame([{"region": "Africa", "date": "2020-01-01"}, {"region": "Europe", "date": "2020-01-02"}], index=["strain1", "strain2"])
    >>> sequence_index = pd.DataFrame([{"strain": "strain1", "ACGT": 28000}]).set_index("strain")
    >>> filter_by_sequence_index(metadata, sequence_index)
@@ -381,6 +389,7 @@ def filter_by_sequence_length(metadata, sequence_index, min_length=0):
    set[str]:
        Strains that pass the filter

+
    >>> metadata = pd.DataFrame([{"region": "Africa", "date": "2020-01-01"}, {"region": "Europe", "date": "2020-01-02"}], index=["strain1", "strain2"])
    >>> sequence_index = pd.DataFrame([{"strain": "strain1", "A": 7000, "C": 7000, "G": 7000, "T": 7000}, {"strain": "strain2", "A": 6500, "C": 6500, "G": 6500, "T": 6500}]).set_index("strain")
    >>> filter_by_sequence_length(metadata, sequence_index, min_length=27000)
@@ -417,6 +426,7 @@ def filter_by_non_nucleotide(metadata, sequence_index):
    set[str]:
        Strains that pass the filter

+
    >>> metadata = pd.DataFrame([{"region": "Africa", "date": "2020-01-01"}, {"region": "Europe", "date": "2020-01-02"}], index=["strain1", "strain2"])
    >>> sequence_index = pd.DataFrame([{"strain": "strain1", "invalid_nucleotides": 0}, {"strain": "strain2", "invalid_nucleotides": 1}]).set_index("strain")
    >>> filter_by_non_nucleotide(metadata, sequence_index)
@@ -447,6 +457,7 @@ def include(metadata, include_file):
    set[str]:
        Strains that pass the filter

+
    >>> metadata = pd.DataFrame([{"region": "Africa"}, {"region": "Europe"}], index=["strain1", "strain2"])
    >>> with NamedTemporaryFile(delete=False) as include_file:
    ...     characters_written = include_file.write(b'strain1')
@@ -479,6 +490,7 @@ def include_by_include_where(metadata, include_where):
    set[str]:
        Strains that pass the filter

+
    >>> metadata = pd.DataFrame([{"region": "Africa"}, {"region": "Europe"}], index=["strain1", "strain2"])
    >>> include_by_include_where(metadata, "region!=Europe")
    {'strain1'}
@@ -664,6 +676,7 @@ def filter_kwargs_to_str(kwargs):
    str :
        String representation of the kwargs for reporting.

+
    >>> sequence_index = pd.DataFrame([{"strain": "strain1", "ACGT": 28000}, {"strain": "strain2", "ACGT": 26000}, {"strain": "strain3", "ACGT": 5000}]).set_index("strain")
    >>> exclude_by = [(filter_by_sequence_length, {"sequence_index": sequence_index, "min_length": 27000})]
    >>> filter_kwargs_to_str(exclude_by[0][1])
@@ -718,9 +731,11 @@ def apply_filters(metadata, exclude_by, include_by):
    list[dict] :
        Strains to force-include along with the function that filtered them and the arguments used to run the function.

+
    For example, filter data by minimum date, but force the include of strains
    from Africa.

+
    >>> metadata = pd.DataFrame([{"region": "Africa", "date": "2020-01-01"}, {"region": "Europe", "date": "2020-10-02"}, {"region": "North America", "date": "2020-01-01"}], index=["strain1", "strain2", "strain3"])
    >>> exclude_by = [(filter_by_date, {"min_date": numeric_date("2020-04-01")})]
    >>> include_by = [(include_by_include_where, {"include_where": "region=Africa"})]
@@ -825,6 +840,7 @@ def get_groups_for_subsampling(strains, metadata, group_by=None):
    list :
        A list of dictionaries with strains that were skipped from grouping and the reason why (see also: `apply_filters` output).

+
    >>> strains = ["strain1", "strain2"]
    >>> metadata = pd.DataFrame([{"strain": "strain1", "date": "2020-01-01", "region": "Africa"}, {"strain": "strain2", "date": "2020-02-01", "region": "Europe"}]).set_index("strain")
    >>> group_by = ["region"]

--- a/augur/io.py
+++ b/augur/io.py
@@ -58,6 +58,7 @@ def read_metadata(metadata_file, id_columns=("strain", "name"), chunk_size=None)
    KeyError :
        When the metadata file does not have any valid index columns.

+
    For standard use, request a metadata file and get a pandas DataFrame.

    >>> read_metadata("tests/functional/filter/metadata.tsv").index.values[0]
@@ -91,11 +92,13 @@ def read_metadata(metadata_file, id_columns=("strain", "name"), chunk_size=None)
        kwargs["chunksize"] = chunk_size

    # Inspect the first chunk of the metadata, to find any valid index columns.
-    chunk = pd.read_csv(
+    metadata = pd.read_csv(
        metadata_file,
        iterator=True,
        **kwargs,
-    ).read(nrows=1)
+    )
+    chunk = metadata.read(nrows=1)
+    metadata.close()

    id_columns_present = [
        id_column

--- a/augur/titer_model.py
+++ b/augur/titer_model.py
@@ -35,6 +35,7 @@ class TiterCollection(object):
        tuple (dict, list, list)
            tuple of a dict of titer measurements, list of strains, list of sources

+
        >>> measurements, strains, sources = TiterCollection.load_from_file("tests/data/titer_model/h3n2_titers_subset.tsv")
        >>> type(measurements)
        <class 'dict'>
@@ -107,6 +108,7 @@ class TiterCollection(object):
        dict
            number of measurements per strain

+
        >>> measurements, strains, sources = TiterCollection.load_from_file("tests/data/titer_model/h3n2_titers_subset.tsv")
        >>> titer_counts = TiterCollection.count_strains(measurements)
        >>> titer_counts["A/Acores/11/2013"]
@@ -142,6 +144,7 @@ class TiterCollection(object):
            reduced dictionary of titer measurements containing only those were
            test and reference virus are part of the strain list

+
        >>> measurements, strains, sources = TiterCollection.load_from_file("tests/data/titer_model/h3n2_titers_subset.tsv")
        >>> len(measurements)
        11

--- a/augur/tree.py
+++ b/augur/tree.py
@@ -59,6 +59,7 @@ def check_conflicting_args(tree_builder_args, defaults):
    ConflictingArgumentsException
        When any user-provided arguments match those in the defaults.

+
    >>> defaults = ("-nt", "-m", "-s")
    >>> check_conflicting_args("-czb -n 2", defaults)
    >>> check_conflicting_args("-czb -nt 2", defaults)

--- a/augur/utils.py
+++ b/augur/utils.py
@@ -551,7 +551,7 @@ def annotate_parents_for_tree(tree):
    return tree


-def json_to_tree(json_dict, root=True):
+def json_to_tree(json_dict, root=True, parent_cumulative_branch_length=None):
    """Returns a Bio.Phylo tree corresponding to the given JSON dictionary exported
    by `tree_to_json`.

@@ -589,6 +589,17 @@ def json_to_tree(json_dict, root=True):
    True
    >>> tree.clades[0].branch_length > 0
    True
+
+    Branch lengths should be the length of the branch to each node and not the
+    length from the root. The cumulative branch length from the root gets its
+    own attribute.
+
+    >>> tip = [tip for tip in tree.find_clades(terminal=True) if tip.name == "USA/2016/FLWB042"][0]
+    >>> round(tip.cumulative_branch_length, 6)
+    0.004747
+    >>> round(tip.branch_length, 6)
+    0.000186
+
    """
    # Check for v2 JSON which has combined metadata and tree data.
    if root and "meta" in json_dict and "tree" in json_dict:
@@ -602,10 +613,6 @@ def json_to_tree(json_dict, root=True):
    else:
        node.name = json_dict["strain"]

-    if "children" in json_dict:
-        # Recursively add children to the current node.
-        node.clades = [json_to_tree(child, root=False) for child in json_dict["children"]]
-
    # Assign all non-children attributes.
    for attr, value in json_dict.items():
        if attr != "children":
@@ -614,12 +621,27 @@ def json_to_tree(json_dict, root=True):
    # Only v1 JSONs support a single `attr` attribute.
    if hasattr(node, "attr"):
        node.numdate = node.attr.get("num_date")
-        node.branch_length = node.attr.get("div")
+        node.cumulative_branch_length = node.attr.get("div")

        if "translations" in node.attr:
            node.translations = node.attr["translations"]
    elif hasattr(node, "node_attrs"):
-        node.branch_length = node.node_attrs.get("div")
+        node.cumulative_branch_length = node.node_attrs.get("div")
+
+    node.branch_length = 0.0
+    if parent_cumulative_branch_length is not None and hasattr(node, "cumulative_branch_length"):
+        node.branch_length = node.cumulative_branch_length - parent_cumulative_branch_length
+
+    if "children" in json_dict:
+        # Recursively add children to the current node.
+        node.clades = [
+            json_to_tree(
+                child,
+                root=False,
+                parent_cumulative_branch_length=node.cumulative_branch_length
+            )
+            for child in json_dict["children"]
+        ]

    if root:
        node = annotate_parents_for_tree(node)
@@ -646,8 +668,8 @@ def read_bed_file(bed_file):
    bed_file : str
        Path to the BED file

-    Returns:
-    --------
+    Returns
+    -------
    list[int]:
        Sorted list of unique zero-indexed sites
    """
@@ -677,8 +699,8 @@ def read_mask_file(mask_file):
    mask_file : str
        Path to the masking file

-    Returns:
-    --------
+    Returns
+    -------
    list[int]:
        Sorted list of unique zero-indexed sites
    """
@@ -726,7 +748,7 @@ def read_strains(*files, comment_char="#"):
    set of distinct strains.

    Strain names can be commented with full-line or inline comments. For
-    example, the following is a valid strain names file:
+    example, the following is a valid strain names file::

        # this is a comment at the top of the file
        strain1  # exclude strain1 because it isn't sequenced properly

--- a/debian/changelog
+++ b/debian/changelog
+augur (15.0.2-1) unstable; urgency=medium
+
+  * Team upload.
+  * New upstream version
+  * Standards-Version: 4.6.1 (routine-update)
+
+ -- Étienne Mollier <emollier@debian.org>  Sun, 15 May 2022 15:16:15 +0200
+
 augur (15.0.1-1) unstable; urgency=medium

  * New upstream version

--- a/debian/control
+++ b/debian/control
@@ -21,7 +21,7 @@ Build-Depends: debhelper-compat (= 13),
               python3-isodate <!nocheck>,
               vcftools <!nocheck>,
               python3-xopen
-Standards-Version: 4.6.0
+Standards-Version: 4.6.1
 Vcs-Browser: https://salsa.debian.org/med-team/augur
 Vcs-Git: https://salsa.debian.org/med-team/augur.git
 Homepage: https://github.com/nextstrain/augur

--- a/devel/release
+++ b/devel/release
@@ -159,9 +159,8 @@ merge-to-release-branch() {
 }

 build-dist() {
-    rm -rfv dist augur.egg-info
-    python3 setup.py clean
-    python3 setup.py sdist bdist_wheel
+    rm -rfv build/ dist/ nextstrain_augur.egg-info
+    python3 -m build
 }

 remind-to-push() {

--- a/docs/api/augur.io.rst
+++ b/docs/api/augur.io.rst
+augur.io module
+===============
+
+.. automodule:: augur.io
+   :members:
+   :undoc-members:
+   :show-inheritance:
--- a/docs/api/augur.rst
+++ b/docs/api/augur.rst
@@ -32,6 +32,7 @@ Submodules
   augur.import
   augur.import_beast
   augur.index
+   augur.io
   augur.lbi
   augur.mask
   augur.parse

--- a/docs/contribute/DEV_DOCS.md
+++ b/docs/contribute/DEV_DOCS.md
@@ -30,9 +30,6 @@ Please see the [project board](https://github.com/orgs/nextstrain/projects/6) fo
 We currently target compatibility with Python 3.7 and higher. As Python releases new versions,
 the minimum target compatibility may be increased in the future.

-Versions for this project, Augur, from 3.0.0 onwards aim to follow the
-[Semantic Versioning rules](https://semver.org).
-
 ### Running local changes

 While you are making code changes, you will want to run augur to see it behavior with those changes.
@@ -158,27 +155,64 @@ We use [codecov](https://codecov.io/) to automatically produce test coverage for

 ### Releasing

-Before you create a new release, run all tests from a fresh conda environment to verify that nothing has broken since the last CI build on GitHub.
-The following commands will setup the equivalent conda environment to the GitHub Actions environment, run unit and integration tests, and deactivate the environment.
+Versions for this project, Augur, from 3.0.0 onwards aim to follow the
+[Semantic Versioning rules](https://semver.org).
+
+#### Steps

-```bash
-# Update Conda.
-conda activate base
-conda update conda
+##### 1. Gather PRs and draft release notes

-# Create an Augur environment.
-conda create -n augur -c conda-forge -c bioconda augur
-conda activate augur
-python3 -m pip install -e .[dev]
+1. Compare changes to find PRs and direct commits since the previous tag (e.g. https://github.com/nextstrain/augur/compare/14.1.0...15.0.0, replacing `14.1.0` with previous tag and `15.0.0` with `master`)
+2. Add the PRs to the open GitHub milestone.
+3. Define a new version number `X.X.X` based on changes and Semantic Versioning rules.
+4. Rename the milestone as `<Major|Feature|Patch> release X.X.X`.
+5. Draft changes in the milestone description using Markdown. Keep headers and formatting consistent with [CHANGES.md](../../CHANGES.md).

-# Run tests.
-./run_tests.sh
-bash tests/builds/runner.sh
+##### 2. Update change log

-# Clean up.
-conda deactivate
-conda env remove -n augur
-```
+1. Visit [this link](https://github.com/nextstrain/augur/edit/master/CHANGES.md) to open `CHANGES.md` for edit.
+2. Add the milestone description under the `__NEXT__` header.
+3. At the bottom of the page:
+    1. Title: `Update change log for X.X.X`
+    2. Description: leave empty
+    3. Select the option **Commit directly to the `master` branch.**
+4. Select **Commit changes**.
+
+##### 3. Run build/test/release scripts
+
+1. Go to [this GitHub Actions workflow](https://github.com/nextstrain/augur/actions/workflows/release.yaml).
+2. Select **Run workflow**. In the new menu:
+    1. Ensure `master` branch is selected.
+    2. In **New version X.X.X**, provide the new version number.
+    3. Set **PyPI instance for publishing** as *PyPI* (default) or *TestPyPI*. [More info](https://packaging.python.org/en/latest/guides/using-testpypi)
+    4. Select **Run workflow**.
+3. Ensure workflow runs successfully.
+
+##### 4. Update GitHub milestones
+
+1. Close current release milestone.
+2. Create new milestone named `Next release X.X.X`.
+
+##### 5. Update on Bioconda
+
+For versions without dependency changes:
+
+1. Wait for an auto-bump PR in [bioconda-recipes][].
+2. Add a comment `@BiocondaBot please add label`.
+3. Wait for a bioconda maintainer to approve and merge.
+
+For versions with dependency changes:
+
+1. Create a new PR in [bioconda-recipes][] following instructions at [nextstrain/bioconda-recipes/README.md](https://github.com/nextstrain/bioconda-recipes/blob/readme/README.md).
+    - [Example](https://github.com/bioconda/bioconda-recipes/pull/34344)
+2. Add a comment `@BiocondaBot please add label`.
+3. Wait for a bioconda maintainer to approve and merge.
+4. Wait for an auto-bump PR in [bioconda-recipes][].
+5. Add a comment in the auto-bump PR `Please close this in favor of #<your PR number>`.
+
+[bioconda-recipes]: https://github.com/bioconda/bioconda-recipes/pull/34509
+
+#### Notes

 New releases are tagged in git using an "annotated" tag.  If the git option
 `user.signingKey` is set, the tag will also be [signed][].  Signed tags are

--- a/tests/functional/export_v2.t
+++ b/tests/functional/export_v2.t
@@ -65,3 +65,49 @@ Export with auspice config JSON with an extensions block
  $ python3 "$TESTDIR/../../scripts/diff_jsons.py"  export_v2/dataset2.json "$TMP/dataset3.json" \
  >   --exclude-paths "root['meta']['updated']"
  {}
+
+Run export with metadata using the default id column of "strain".
+
+  $ ${AUGUR} export v2 \
+  >  --tree export_v2/tree.nwk \
+  >  --metadata export_v2/dataset1_metadata_with_strain.tsv \
+  >  --node-data export_v2/div_node-data.json export_v2/location_node-data.json \
+  >  --auspice-config export_v2/auspice_config1.json \
+  >  --maintainers "Nextstrain Team" \
+  >  --output "$TMP/dataset1.json" > /dev/null
+
+  $ python3 "$TESTDIR/../../scripts/diff_jsons.py" export_v2/dataset1.json "$TMP/dataset1.json" \
+  >   --exclude-paths "root['meta']['updated']" "root['meta']['maintainers']"
+  {}
+  $ rm -f "$TMP/dataset1.json"
+
+Run export with metadata that uses a different id column other than "strain".
+In this case, the column is "name" (one of the default columns expected by Augur's `io.read_metadata` function).
+
+  $ ${AUGUR} export v2 \
+  >  --tree export_v2/tree.nwk \
+  >  --metadata export_v2/dataset1_metadata_with_name.tsv \
+  >  --node-data export_v2/div_node-data.json export_v2/location_node-data.json \
+  >  --auspice-config export_v2/auspice_config1.json \
+  >  --maintainers "Nextstrain Team" \
+  >  --output "$TMP/dataset1.json" > /dev/null
+
+  $ python3 "$TESTDIR/../../scripts/diff_jsons.py" export_v2/dataset1.json "$TMP/dataset1.json" \
+  >   --exclude-paths "root['meta']['updated']" "root['meta']['maintainers']"
+  {}
+  $ rm -f "$TMP/dataset1.json"
+
+Run export with metadata that uses an invalid id column.
+This should fail with a helpful error message.
+
+  $ ${AUGUR} export v2 \
+  >  --tree export_v2/tree.nwk \
+  >  --metadata export_v2/dataset1_metadata_without_valid_id.tsv \
+  >  --node-data export_v2/div_node-data.json export_v2/location_node-data.json \
+  >  --auspice-config export_v2/auspice_config1.json \
+  >  --maintainers "Nextstrain Team" \
+  >  --output "$TMP/dataset1.json" > /dev/null
+  ERROR: None of the possible id columns (('strain', 'name')) were found in the metadata's columns ('invalid_id', 'div', 'mutation_length')
+  [1]
+
+  $ popd > /dev/null
No results found