From 916236e68aa865c9c15c56ca1b325d1d065e3d85 Mon Sep 17 00:00:00 2001 From: Geovanna Maciel <geoojv@gmail.com> Date: Thu, 30 Jan 2025 09:59:06 -0300 Subject: [PATCH 1/3] New upstream version 3.1.44 --- .flake8 | 38 - .github/dependabot.yml | 7 +- .github/workflows/alpine-test.yml | 72 + .github/workflows/codeql.yml | 80 ++ .github/workflows/cygwin-test.yml | 62 +- .github/workflows/lint.yml | 12 +- .github/workflows/pythonpackage.yml | 71 +- .gitignore | 51 +- .pre-commit-config.yaml | 53 +- .readthedocs.yaml | 36 + AUTHORS | 3 + CONTRIBUTING.md | 5 + FUNDING.json | 7 + Makefile | 2 +- README.md | 170 +-- SECURITY.md | 6 +- VERSION | 2 +- build-release.sh | 18 +- check-version.sh | 29 +- doc/Makefile | 43 +- doc/requirements.txt | 2 +- doc/source/changes.rst | 45 + doc/source/conf.py | 95 +- doc/source/index.rst | 1 - doc/source/intro.rst | 1 - doc/source/reference.rst | 63 +- doc/source/roadmap.rst | 1 - doc/source/tutorial.rst | 3 +- fuzzing/LICENSE-APACHE | 201 +++ fuzzing/LICENSE-BSD | 1 + fuzzing/README.md | 226 +++ fuzzing/fuzz-targets/fuzz_blob.py | 40 + fuzzing/fuzz-targets/fuzz_config.py | 57 + fuzzing/fuzz-targets/fuzz_diff.py | 86 ++ fuzzing/fuzz-targets/fuzz_repo.py | 47 + fuzzing/fuzz-targets/fuzz_submodule.py | 100 ++ fuzzing/fuzz-targets/utils.py | 122 ++ fuzzing/local-dev-helpers/Dockerfile | 22 + fuzzing/oss-fuzz-scripts/build.sh | 19 + .../container-environment-bootstrap.sh | 108 ++ git/__init__.py | 310 +++- git/cmd.py | 1263 ++++++++++------- git/compat.py | 127 +- git/config.py | 467 +++--- git/db.py | 48 +- git/diff.py | 394 +++-- git/exc.py | 104 +- git/index/__init__.py | 20 +- git/index/base.py | 998 +++++++------ git/index/fun.py | 290 ++-- git/index/typ.py | 81 +- git/index/util.py | 66 +- git/objects/__init__.py | 43 +- git/objects/base.py | 215 ++- git/objects/blob.py | 32 +- git/objects/commit.py | 457 +++--- git/objects/fun.py | 162 ++- git/objects/submodule/__init__.py | 9 +- git/objects/submodule/base.py | 1020 +++++++------ git/objects/submodule/root.py | 237 ++-- git/objects/submodule/util.py | 65 +- git/objects/tag.py | 89 +- git/objects/tree.py | 276 ++-- git/objects/util.py | 429 +++--- git/refs/__init__.py | 28 +- git/refs/head.py | 167 ++- git/refs/log.py | 195 ++- git/refs/reference.py | 126 +- git/refs/remote.py | 48 +- git/refs/symbolic.py | 503 ++++--- git/refs/tag.py | 87 +- git/remote.py | 641 +++++---- git/repo/__init__.py | 11 +- git/repo/base.py | 901 +++++++----- git/repo/fun.py | 227 +-- git/types.py | 238 +++- git/util.py | 755 ++++++---- init-tests-after-clone.sh | 72 +- pyproject.toml | 71 +- requirements-dev.txt | 7 +- setup.py | 32 +- test-requirements.txt | 9 +- test/__init__.py | 5 +- test/deprecation/__init__.py | 19 + test/deprecation/lib.py | 27 + test/deprecation/test_basic.py | 137 ++ test/deprecation/test_cmd_git.py | 391 +++++ test/deprecation/test_compat.py | 84 ++ test/deprecation/test_toplevel.py | 233 +++ test/deprecation/test_types.py | 69 + test/fixtures/diff_numstat | 5 +- test/fixtures/env_case.py | 2 +- test/fixtures/polyglot | 8 + test/lib/__init__.py | 9 +- test/lib/helper.py | 231 +-- test/performance/__init__.py | 2 + test/performance/lib.py | 53 +- test/performance/test_commit.py | 33 +- test/performance/test_odb.py | 14 +- test/performance/test_streams.py | 53 +- test/test_actor.py | 10 +- test/test_base.py | 61 +- test/test_blob.py | 8 +- test/test_blob_filter.py | 23 +- test/test_clone.py | 14 +- test/test_commit.py | 131 +- test/test_config.py | 90 +- test/test_db.py | 15 +- test/test_diff.py | 189 ++- test/test_docs.py | 302 ++-- test/test_exc.py | 32 +- test/test_fun.py | 92 +- test/test_git.py | 651 +++++++-- test/test_imports.py | 32 + test/test_index.py | 588 +++++--- test/test_installation.py | 65 +- test/test_quick_doc.py | 43 +- test/test_reflog.py | 45 +- test/test_refs.py | 218 +-- test/test_remote.py | 304 ++-- test/test_repo.py | 325 +++-- test/test_stats.py | 20 +- test/test_submodule.py | 467 +++--- test/test_tree.py | 116 +- test/test_util.py | 524 +++++-- test/tstrunner.py | 5 + tox.ini | 49 +- 127 files changed, 12649 insertions(+), 6472 deletions(-) delete mode 100644 .flake8 create mode 100644 .github/workflows/alpine-test.yml create mode 100644 .github/workflows/codeql.yml create mode 100644 .readthedocs.yaml create mode 100644 FUNDING.json create mode 100644 fuzzing/LICENSE-APACHE create mode 120000 fuzzing/LICENSE-BSD create mode 100644 fuzzing/README.md create mode 100644 fuzzing/fuzz-targets/fuzz_blob.py create mode 100644 fuzzing/fuzz-targets/fuzz_config.py create mode 100644 fuzzing/fuzz-targets/fuzz_diff.py create mode 100644 fuzzing/fuzz-targets/fuzz_repo.py create mode 100644 fuzzing/fuzz-targets/fuzz_submodule.py create mode 100644 fuzzing/fuzz-targets/utils.py create mode 100644 fuzzing/local-dev-helpers/Dockerfile create mode 100644 fuzzing/oss-fuzz-scripts/build.sh create mode 100755 fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh create mode 100644 test/deprecation/__init__.py create mode 100644 test/deprecation/lib.py create mode 100644 test/deprecation/test_basic.py create mode 100644 test/deprecation/test_cmd_git.py create mode 100644 test/deprecation/test_compat.py create mode 100644 test/deprecation/test_toplevel.py create mode 100644 test/deprecation/test_types.py create mode 100755 test/fixtures/polyglot create mode 100644 test/test_imports.py diff --git a/.flake8 b/.flake8 deleted file mode 100644 index ed5d036..0000000 --- a/.flake8 +++ /dev/null @@ -1,38 +0,0 @@ -[flake8] -show-source = True -count= True -statistics = True -# E265 = comment blocks like @{ section, which it can't handle -# E266 = too many leading '#' for block comment -# E731 = do not assign a lambda expression, use a def -# W293 = Blank line contains whitespace -# W504 = Line break after operator -# E704 = multiple statements in one line - used for @override -# TC002 = move third party import to TYPE_CHECKING -# ANN = flake8-annotations -# TC, TC2 = flake8-type-checking -# D = flake8-docstrings - -# select = C,E,F,W ANN, TC, TC2 # to enable code. Disabled if not listed, including builtin codes -enable-extensions = TC, TC2 # only needed for extensions not enabled by default - -ignore = E265,E266,E731,E704, - W293, W504, - ANN0 ANN1 ANN2, - TC002, - TC0, TC1, TC2 - # B, - A, - D, - RST, RST3 - -exclude = .tox,.venv,build,dist,doc,git/ext/ - -rst-roles = # for flake8-RST-docstrings - attr,class,func,meth,mod,obj,ref,term,var # used by sphinx - -min-python-version = 3.7.0 - -# for `black` compatibility -max-line-length = 120 -extend-ignore = E203,W503 diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 203f3c8..2fe73ca 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -3,4 +3,9 @@ updates: - package-ecosystem: "github-actions" directory: "/" schedule: - interval: "weekly" + interval: "weekly" + +- package-ecosystem: "gitsubmodule" + directory: "/" + schedule: + interval: "weekly" diff --git a/.github/workflows/alpine-test.yml b/.github/workflows/alpine-test.yml new file mode 100644 index 0000000..2c1eed3 --- /dev/null +++ b/.github/workflows/alpine-test.yml @@ -0,0 +1,72 @@ +name: test-alpine + +on: [push, pull_request, workflow_dispatch] + +jobs: + build: + runs-on: ubuntu-latest + + container: + image: alpine:latest + + defaults: + run: + shell: sudo -u runner sh -exo pipefail {0} + + steps: + - name: Prepare Alpine Linux + run: | + apk add sudo git git-daemon python3 py3-pip + echo 'Defaults env_keep += "CI GITHUB_* RUNNER_*"' >/etc/sudoers.d/ci_env + addgroup -g 127 docker + adduser -D -u 1001 runner + adduser runner docker + shell: sh -exo pipefail {0} # Run this as root, not the "runner" user. + + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set workspace ownership + run: | + chown -R runner:docker -- "$GITHUB_WORKSPACE" + shell: sh -exo pipefail {0} # Run this as root, not the "runner" user. + + - name: Prepare this repo for tests + run: | + ./init-tests-after-clone.sh + + - name: Set git user identity and command aliases for the tests + run: | + git config --global user.email "travis@ci.com" + git config --global user.name "Travis Runner" + # If we rewrite the user's config by accident, we will mess it up + # and cause subsequent tests to fail + cat test/fixtures/.gitconfig >> ~/.gitconfig + + - name: Set up virtualenv + run: | + python -m venv .venv + . .venv/bin/activate + printf '%s=%s\n' 'PATH' "$PATH" 'VIRTUAL_ENV' "$VIRTUAL_ENV" >>"$GITHUB_ENV" + + - name: Update PyPA packages + run: | + # Get the latest pip, wheel, and prior to Python 3.12, setuptools. + python -m pip install -U pip $(pip freeze --all | grep -ow ^setuptools) wheel + + - name: Install project and test dependencies + run: | + pip install ".[test]" + + - name: Show version and platform information + run: | + uname -a + command -v git python + git version + python --version + python -c 'import os, sys; print(f"sys.platform={sys.platform!r}, os.name={os.name!r}")' + + - name: Test with pytest + run: | + pytest --color=yes -p no:sugar --instafail -vv diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..ae52418 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,80 @@ +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. +# +# You may wish to alter this file to override the set of languages analyzed, +# or to provide custom queries or build logic. +# +# ******** NOTE ******** +# We have attempted to detect the languages in your repository. Please check +# the `language` matrix defined below to confirm you have the correct set of +# supported CodeQL languages. +# +name: "CodeQL" + +on: + push: + pull_request: + schedule: + - cron: '27 10 * * 3' + +jobs: + analyze: + name: Analyze + # Runner size impacts CodeQL analysis time. To learn more, please see: + # - https://gh.io/recommended-hardware-resources-for-running-codeql + # - https://gh.io/supported-runners-and-hardware-resources + # - https://gh.io/using-larger-runners + # Consider using larger runners for possible analysis time improvements. + runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} + timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: [ 'python' ] + # CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ] + # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both + # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both + # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + setup-python-dependencies: false + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + + # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs + # queries: security-extended,security-and-quality + + + # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). + # If this step fails, then you should remove it and run the build manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@v3 + + # ℹ️ Command-line programs to run using the OS shell. + # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun + + # If the Autobuild fails above, remove it and uncomment the following three lines. + # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. + + # - run: | + # echo "Run, Build Application using script" + # ./location_of_script_within_repo/buildscript.sh + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:${{matrix.language}}" diff --git a/.github/workflows/cygwin-test.yml b/.github/workflows/cygwin-test.yml index 962791a..bde4ea6 100644 --- a/.github/workflows/cygwin-test.yml +++ b/.github/workflows/cygwin-test.yml @@ -5,60 +5,78 @@ on: [push, pull_request, workflow_dispatch] jobs: build: runs-on: windows-latest + strategy: fail-fast: false + env: - CHERE_INVOKING: 1 - SHELLOPTS: igncr - TMP: "/tmp" - TEMP: "/tmp" + CHERE_INVOKING: "1" + CYGWIN_NOWINPATH: "1" + defaults: run: - shell: bash.exe --noprofile --norc -exo pipefail -o igncr "{0}" + shell: C:\tools\cygwin\bin\bash.exe --login --norc -eo pipefail -o igncr "{0}" steps: - name: Force LF line endings - run: git config --global core.autocrlf input + run: | + git config --global core.autocrlf false # Affects the non-Cygwin git. + shell: bash # Use Git Bash instead of Cygwin Bash for this step. - uses: actions/checkout@v4 with: fetch-depth: 0 - submodules: recursive - - uses: cygwin/cygwin-install-action@v4 + - name: Set up Cygwin + uses: egor-tensin/setup-cygwin@v4 with: - packages: python39 python39-pip python39-virtualenv git + packages: python39=3.9.16-1 python39-pip python39-virtualenv git - - name: Show python and git versions + - name: Arrange for verbose output run: | - /usr/bin/python --version - /usr/bin/git version + # Arrange for verbose output but without shell environment setup details. + echo 'set -x' >~/.bash_profile - - name: Tell git to trust this repo + - name: Special configuration for Cygwin git run: | - /usr/bin/git config --global --add safe.directory "$(pwd)" + git config --global --add safe.directory "$(pwd)" + git config --global --add safe.directory "$(pwd)/.git" + git config --global core.autocrlf false - name: Prepare this repo for tests run: | - TRAVIS=yes ./init-tests-after-clone.sh + ./init-tests-after-clone.sh - - name: Further prepare git configuration for tests + - name: Set git user identity and command aliases for the tests run: | - /usr/bin/git config --global user.email "travis@ci.com" - /usr/bin/git config --global user.name "Travis Runner" + git config --global user.email "travis@ci.com" + git config --global user.name "Travis Runner" # If we rewrite the user's config by accident, we will mess it up # and cause subsequent tests to fail cat test/fixtures/.gitconfig >> ~/.gitconfig + - name: Ensure the "pip" command is available + run: | + # This is used unless, and before, an updated pip is installed. + ln -s pip3 /usr/bin/pip + - name: Update PyPA packages run: | - /usr/bin/python -m pip install --upgrade pip setuptools wheel + # Get the latest pip, wheel, and prior to Python 3.12, setuptools. + python -m pip install -U pip $(pip freeze --all | grep -ow ^setuptools) wheel - name: Install project and test dependencies run: | - /usr/bin/python -m pip install ".[test]" + pip install ".[test]" + + - name: Show version and platform information + run: | + uname -a + command -v git python + git version + python --version + python -c 'import os, sys; print(f"sys.platform={sys.platform!r}, os.name={os.name!r}")' - name: Test with pytest run: | - set +x - /usr/bin/python -m pytest + pytest --color=yes -p no:sugar --instafail -vv diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 5e79664..a0e81a9 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -7,8 +7,10 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - uses: actions/setup-python@v4 - with: - python-version: "3.x" - - uses: pre-commit/action@v3.0.0 + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.x" + + - uses: pre-commit/action@v3.0.1 diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml index a5467ef..747db62 100644 --- a/.github/workflows/pythonpackage.yml +++ b/.github/workflows/pythonpackage.yml @@ -10,42 +10,46 @@ permissions: jobs: build: - - runs-on: ubuntu-latest strategy: fail-fast: false matrix: + os: ["ubuntu-22.04", "macos-latest", "windows-latest"] python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"] + exclude: + - os: "macos-latest" + python-version: "3.7" include: - - experimental: false - - python-version: "3.12" - experimental: true + - experimental: false + + runs-on: ${{ matrix.os }} + defaults: run: - shell: /bin/bash --noprofile --norc -exo pipefail {0} + shell: bash --noprofile --norc -exo pipefail {0} steps: - uses: actions/checkout@v4 with: fetch-depth: 0 - submodules: recursive - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} allow-prereleases: ${{ matrix.experimental }} - - name: Show python and git versions - run: | - python --version - git version + - name: Set up WSL (Windows) + if: startsWith(matrix.os, 'windows') + uses: Vampire/setup-wsl@v3.1.1 + with: + distribution: Alpine + additional-packages: bash - name: Prepare this repo for tests run: | - TRAVIS=yes ./init-tests-after-clone.sh + ./init-tests-after-clone.sh - - name: Prepare git configuration for tests + - name: Set git user identity and command aliases for the tests run: | git config --global user.email "travis@ci.com" git config --global user.name "Travis Runner" @@ -55,30 +59,47 @@ jobs: - name: Update PyPA packages run: | - python -m pip install --upgrade pip - if pip freeze --all | grep --quiet '^setuptools=='; then - # Python prior to 3.12 ships setuptools. Upgrade it if present. - python -m pip install --upgrade setuptools - fi - python -m pip install --upgrade wheel + # Get the latest pip, wheel, and prior to Python 3.12, setuptools. + python -m pip install -U pip $(pip freeze --all | grep -ow ^setuptools) wheel - name: Install project and test dependencies run: | pip install ".[test]" + - name: Show version and platform information + run: | + uname -a + command -v git python + git version + python --version + python -c 'import os, sys; print(f"sys.platform={sys.platform!r}, os.name={os.name!r}")' + + # For debugging hook tests on native Windows systems that may have WSL. + - name: Show bash.exe candidates (Windows) + if: startsWith(matrix.os, 'windows') + run: | + set +e + bash.exe -c 'printenv WSL_DISTRO_NAME; uname -a' + python -c 'import subprocess; subprocess.run(["bash.exe", "-c", "printenv WSL_DISTRO_NAME; uname -a"])' + continue-on-error: true + - name: Check types with mypy run: | - mypy -p git - # With new versions of mypy new issues might arise. This is a problem if there is nobody able to fix them, - # so we have to ignore errors until that changes. + mypy --python-version=${{ matrix.python-version }} + env: + MYPY_FORCE_COLOR: "1" + TERM: "xterm-256color" # For color: https://github.com/python/mypy/issues/13817 + # With new versions of mypy new issues might arise. This is a problem if there is + # nobody able to fix them, so we have to ignore errors until that changes. continue-on-error: true - name: Test with pytest run: | - pytest + pytest --color=yes -p no:sugar --instafail -vv continue-on-error: false - name: Documentation + if: matrix.python-version != '3.7' run: | - pip install -r doc/requirements.txt + pip install ".[doc]" make -C doc html diff --git a/.gitignore b/.gitignore index 191e0e6..d855694 100644 --- a/.gitignore +++ b/.gitignore @@ -1,27 +1,52 @@ +# Cached Python bytecode +__pycache__/ *.py[co] + +# Other caches +.cache/ +.mypy_cache/ +.pytest_cache/ + +# Transient editor files *.swp *~ + +# Editor configuration +nbproject +*.sublime-workspace +/.vscode/ +.idea/ + +# Virtual environments .env/ env/ .venv/ venv/ -/*.egg-info + +# Build output +/*egg-info /lib/GitPython.egg-info -cover/ -.coverage -.coverage.* /build /dist /doc/_build -nbproject -*.sublime-workspace -.DS_Store -/*egg-info + +# Tox builds/environments /.tox -/.vscode/ -.idea/ -.cache/ -.mypy_cache/ -.pytest_cache/ + +# Code coverage output +cover/ +.coverage +.coverage.* + +# Monkeytype output monkeytype.sqlite3 +monkeytype.sqlite3.* + +# Manual command output output.txt + +# Finder metadata +.DS_Store + +# Files created by OSS-Fuzz when running locally +fuzz_*.pkg.spec diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5a34b8a..424cc5f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,19 +1,38 @@ repos: - - repo: https://github.com/PyCQA/flake8 - rev: 6.1.0 - hooks: - - id: flake8 - additional_dependencies: - [ - flake8-bugbear==23.9.16, - flake8-comprehensions==3.14.0, - flake8-typing-imports==1.14.0, - ] - exclude: ^doc|^git/ext/ +- repo: https://github.com/codespell-project/codespell + rev: v2.3.0 + hooks: + - id: codespell + additional_dependencies: [tomli] + exclude: ^test/fixtures/ - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 - hooks: - - id: check-merge-conflict - - id: check-toml - - id: check-yaml +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.6.0 + hooks: + - id: ruff + args: ["--fix"] + exclude: ^git/ext/ + - id: ruff-format + exclude: ^git/ext/ + +- repo: https://github.com/shellcheck-py/shellcheck-py + rev: v0.10.0.1 + hooks: + - id: shellcheck + args: [--color] + exclude: ^test/fixtures/polyglot$|^git/ext/ + +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: end-of-file-fixer + exclude: ^test/fixtures/|COPYING|LICENSE + - id: check-symlinks + - id: check-toml + - id: check-yaml + - id: check-merge-conflict + +- repo: https://github.com/abravalheri/validate-pyproject + rev: v0.19 + hooks: + - id: validate-pyproject diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..9bce80f --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,36 @@ +# Read the Docs configuration file for Sphinx projects +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 + +# Set the OS, Python version and other tools you might need. +build: + os: ubuntu-22.04 + tools: + python: "3.12" + # You can also specify other tool versions: + # nodejs: "20" + # rust: "1.70" + # golang: "1.20" + +# Build documentation in the "doc/" directory with Sphinx. +sphinx: + configuration: doc/source/conf.py + # You can configure Sphinx to use a different builder, for instance use the dirhtml builder for simpler URLs + # builder: "dirhtml" + # Fail on all warnings to avoid broken references + fail_on_warning: true + +# Optionally build your docs in additional formats such as PDF and ePub. +formats: all + +# Optional but recommended, declare the Python requirements required +# to build your documentation. +# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +python: + install: + - method: pip + path: . + extra_requirements: + - doc diff --git a/AUTHORS b/AUTHORS index 3e99ff7..45b14c9 100644 --- a/AUTHORS +++ b/AUTHORS @@ -52,5 +52,8 @@ Contributors are: -Joseph Hale <me _at_ jhale.dev> -Santos Gallegos <stsewd _at_ proton.me> -Wenhan Zhu <wzhu.cosmos _at_ gmail.com> +-Eliah Kagan <eliah.kagan _at_ gmail.com> +-Ethan Lin <et.repositories _at_ gmail.com> +-Jonas Scharpf <jonas.scharpf _at_ checkmk.com> Portions derived from other open source works and are clearly marked. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e108f1b..8536d7f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -8,3 +8,8 @@ The following is a short step-by-step rundown of what one typically would do to - Try to avoid massive commits and prefer to take small steps, with one commit for each. - Feel free to add yourself to AUTHORS file. - Create a pull request. + +## Fuzzing Test Specific Documentation + +For details related to contributing to the fuzzing test suite and OSS-Fuzz integration, please +refer to the dedicated [fuzzing README](./fuzzing/README.md). diff --git a/FUNDING.json b/FUNDING.json new file mode 100644 index 0000000..bf3faa6 --- /dev/null +++ b/FUNDING.json @@ -0,0 +1,7 @@ +{ + "drips": { + "ethereum": { + "ownedBy": "0xD0d4dCFc194ec24bCc777e635289e0b10E1a7b87" + } + } +} diff --git a/Makefile b/Makefile index 3809024..d4f9acf 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ .PHONY: all clean release force_release all: - @grep -Ee '^[a-z].*:' Makefile | cut -d: -f1 | grep -vF all + @awk -F: '/^[[:alpha:]].*:/ && !/^all:/ {print $$1}' Makefile clean: rm -rf build/ dist/ .eggs/ .tox/ diff --git a/README.md b/README.md index dbec360..59c6f99 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,8 @@ probably the skills to scratch that itch of mine: implement `git` in a way that If you like the idea and want to learn more, please head over to [gitoxide](https://github.com/Byron/gitoxide), an implementation of 'git' in [Rust](https://www.rust-lang.org). +*(Please note that `gitoxide` is not currently available for use in Python, and that Rust is required.)* + ## GitPython GitPython is a python library used to interact with git repositories, high-level like git-porcelain, @@ -37,9 +39,9 @@ The project is open to contributions of all kinds, as well as new maintainers. ### REQUIREMENTS -GitPython needs the `git` executable to be installed on the system and available in your `PATH` for most operations. -If it is not in your `PATH`, you can help GitPython find it by setting -the `GIT_PYTHON_GIT_EXECUTABLE=<path/to/git>` environment variable. +GitPython needs the `git` executable to be installed on the system and available in your +`PATH` for most operations. If it is not in your `PATH`, you can help GitPython find it +by setting the `GIT_PYTHON_GIT_EXECUTABLE=<path/to/git>` environment variable. - Git (1.7.x or newer) - Python >= 3.7 @@ -55,7 +57,7 @@ GitPython and its required package dependencies can be installed in any of the f To obtain and install a copy [from PyPI](https://pypi.org/project/GitPython/), run: -```bash +```sh pip install GitPython ``` @@ -65,7 +67,7 @@ pip install GitPython If you have downloaded the source code, run this from inside the unpacked `GitPython` directory: -```bash +```sh pip install . ``` @@ -73,27 +75,44 @@ pip install . To clone the [the GitHub repository](https://github.com/gitpython-developers/GitPython) from source to work on the code, you can do it like so: -```bash +```sh git clone https://github.com/gitpython-developers/GitPython cd GitPython -git fetch --tags ./init-tests-after-clone.sh ``` +On Windows, `./init-tests-after-clone.sh` can be run in a Git Bash shell. + If you are cloning [your own fork](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/about-forks), then replace the above `git clone` command with one that gives the URL of your fork. Or use this [`gh`](https://cli.github.com/) command (assuming you have `gh` and your fork is called `GitPython`): -```bash +```sh gh repo clone GitPython ``` -Having cloned the repo, create and activate your [virtual environment](https://docs.python.org/3/tutorial/venv.html). Then make an [editable install](https://pip.pypa.io/en/stable/topics/local-project-installs/#editable-installs): +Having cloned the repo, create and activate your [virtual environment](https://docs.python.org/3/tutorial/venv.html). + +Then make an [editable install](https://pip.pypa.io/en/stable/topics/local-project-installs/#editable-installs): -```bash +```sh pip install -e ".[test]" ``` In the less common case that you do not want to install test dependencies, `pip install -e .` can be used instead. +#### With editable *dependencies* (not preferred, and rarely needed) + +In rare cases, you may want to work on GitPython and one or both of its [gitdb](https://github.com/gitpython-developers/gitdb) and [smmap](https://github.com/gitpython-developers/smmap) dependencies at the same time, with changes in your local working copy of gitdb or smmap immediately reflected in the behavior of your local working copy of GitPython. This can be done by making editable installations of those dependencies in the same virtual environment where you install GitPython. + +If you want to do that *and* you want the versions in GitPython's git submodules to be used, then pass `-e git/ext/gitdb` and/or `-e git/ext/gitdb/gitdb/ext/smmap` to `pip install`. This can be done in any order, and in separate `pip install` commands or the same one, so long as `-e` appears before *each* path. For example, you can install GitPython, gitdb, and smmap editably in the currently active virtual environment this way: + +```sh +pip install -e ".[test]" -e git/ext/gitdb -e git/ext/gitdb/gitdb/ext/smmap +``` + +The submodules must have been cloned for that to work, but that will already be the case if you have run `./init-tests-after-clone.sh`. You can use `pip list` to check which packages are installed editably and which are installed normally. + +To reiterate, this approach should only rarely be used. For most development it is preferable to allow the gitdb and smmap dependencices to be retrieved automatically from PyPI in their latest stable packaged versions. + ### Limitations #### Leakage of System Resources @@ -114,60 +133,65 @@ See [Issue #525](https://github.com/gitpython-developers/GitPython/issues/525). ### RUNNING TESTS -_Important_: Right after cloning this repository, please be sure to have -executed `git fetch --tags` followed by the `./init-tests-after-clone.sh` -script in the repository root. Otherwise you will encounter test failures. - -On _Windows_, make sure you have `git-daemon` in your PATH. For MINGW-git, the `git-daemon.exe` -exists in `Git\mingw64\libexec\git-core\`; CYGWIN has no daemon, but should get along fine -with MINGW's. +_Important_: Right after cloning this repository, please be sure to have executed +the `./init-tests-after-clone.sh` script in the repository root. Otherwise +you will encounter test failures. #### Install test dependencies Ensure testing libraries are installed. This is taken care of already if you installed with: -```bash +```sh pip install -e ".[test]" ``` -Otherwise, you can run: - -```bash -pip install -r test-requirements.txt -``` +If you had installed with a command like `pip install -e .` instead, you can still run +the above command to add the testing dependencies. #### Test commands To test, run: -```bash +```sh pytest ``` -To lint, run: +To lint, and apply some linting fixes as well as automatic code formatting, run: -```bash +```sh pre-commit run --all-files ``` +This includes the linting and autoformatting done by Ruff, as well as some other checks. + To typecheck, run: -```bash -mypy -p git +```sh +mypy ``` -For automatic code formatting, run: +#### CI (and tox) -```bash -black . -``` +Style and formatting checks, and running tests on all the different supported Python versions, will be performed: -Configuration for flake8 is in the `./.flake8` file. +- Upon submitting a pull request. +- On each push, *if* you have a fork with GitHub Actions enabled. +- Locally, if you run [`tox`](https://tox.wiki/) (this skips any Python versions you don't have installed). -Configurations for `mypy`, `pytest`, `coverage.py`, and `black` are in `./pyproject.toml`. +#### Configuration files -The same linting and testing will also be performed against different supported python versions -upon submitting a pull request (or on each push if you have a fork with a "main" branch and actions enabled). +Specific tools are all configured in the `./pyproject.toml` file: + +- `pytest` (test runner) +- `coverage.py` (code coverage) +- `ruff` (linter and formatter) +- `mypy` (type checker) + +Orchestration tools: + +- Configuration for `pre-commit` is in the `./.pre-commit-config.yaml` file. +- Configuration for `tox` is in `./tox.ini`. +- Configuration for GitHub Actions (CI) is in files inside `./.github/workflows/`. ### Contributions @@ -188,66 +212,13 @@ Please have a look at the [contributions file][contributing]. ### How to make a new release -- Update/verify the **version** in the `VERSION` file. -- Update/verify that the `doc/source/changes.rst` changelog file was updated. -- Commit everything. -- Run `git tag -s <version>` to tag the version in Git. -- _Optionally_ create and activate a [virtual environment](https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments/#creating-a-virtual-environment) using `venv` or `virtualenv`.\ -(When run in a virtual environment, the next step will automatically take care of installing `build` and `twine` in it.) -- Run `make release`. -- Close the milestone mentioned in the _changelog_ and create a new one. _Do not reuse milestones by renaming them_. -- Go to [GitHub Releases](https://github.com/gitpython-developers/GitPython/releases) and publish a new one with the recently pushed tag. Generate the changelog. - -### How to verify a release (DEPRECATED) - -Note that what follows is deprecated and future releases won't be signed anymore. -More details about how it came to that can be found [in this issue](https://github.com/gitpython-developers/gitdb/issues/77). - ----- - -Please only use releases from `pypi` as you can verify the respective source -tarballs. - -This script shows how to verify the tarball was indeed created by the authors of -this project: - -```bash -curl https://files.pythonhosted.org/packages/09/bc/ae32e07e89cc25b9e5c793d19a1e5454d30a8e37d95040991160f942519e/GitPython-3.1.8-py3-none-any.whl > gitpython.whl -curl https://files.pythonhosted.org/packages/09/bc/ae32e07e89cc25b9e5c793d19a1e5454d30a8e37d95040991160f942519e/GitPython-3.1.8-py3-none-any.whl.asc > gitpython-signature.asc -gpg --verify gitpython-signature.asc gitpython.whl -``` - -which outputs - -```bash -gpg: Signature made Fr 4 Sep 10:04:50 2020 CST -gpg: using RSA key 27C50E7F590947D7273A741E85194C08421980C9 -gpg: Good signature from "Sebastian Thiel (YubiKey USB-C) <byronimo@gmail.com>" [ultimate] -gpg: aka "Sebastian Thiel (In Rust I trust) <sebastian.thiel@icloud.com>" [ultimate] -``` - -You can verify that the keyid indeed matches the release-signature key provided in this -repository by looking at the keys details: - -```bash -gpg --list-packets ./release-verification-key.asc -``` - -You can verify that the commit adding it was also signed by it using: - -```bash -git show --show-signature ./release-verification-key.asc -``` - -If you would like to trust it permanently, you can import and sign it: - -```bash -gpg --import ./release-verification-key.asc -gpg --edit-key 4C08421980C9 - -> sign -> save -``` +1. Update/verify the **version** in the `VERSION` file. +2. Update/verify that the `doc/source/changes.rst` changelog file was updated. It should include a link to the forthcoming release page: `https://github.com/gitpython-developers/GitPython/releases/tag/<version>` +3. Commit everything. +4. Run `git tag -s <version>` to tag the version in Git. +5. _Optionally_ create and activate a [virtual environment](https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments/#creating-a-virtual-environment). (Then the next step can install `build` and `twine`.) +6. Run `make release`. +7. Go to [GitHub Releases](https://github.com/gitpython-developers/GitPython/releases) and publish a new one with the recently pushed tag. Generate the changelog. ### Projects using GitPython @@ -267,7 +238,10 @@ gpg --edit-key 4C08421980C9 ### LICENSE -[New BSD License](https://opensource.org/license/bsd-3-clause/). See the [LICENSE file](https://github.com/gitpython-developers/GitPython/blob/main/license). +[3-Clause BSD License](https://opensource.org/license/bsd-3-clause/), also known as the New BSD License. See the [LICENSE file][license]. + +One file exclusively used for fuzz testing is subject to [a separate license, detailed here](./fuzzing/README.md#license). +This file is not included in the wheel or sdist packages published by the maintainers of GitPython. [contributing]: https://github.com/gitpython-developers/GitPython/blob/main/CONTRIBUTING.md -[license]: https://github.com/gitpython-developers/GitPython/blob/main/license +[license]: https://github.com/gitpython-developers/GitPython/blob/main/LICENSE diff --git a/SECURITY.md b/SECURITY.md index cf25c09..d39425b 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -2,8 +2,7 @@ ## Supported Versions -Only the latest version of GitPython can receive security updates. If a vulnerability is discovered, a fix can be issued in a new release, while older releases -are likely to be yanked. +Only the latest version of GitPython can receive security updates. If a vulnerability is discovered, a fix can be issued in a new release. | Version | Supported | | ------- | ------------------ | @@ -12,5 +11,4 @@ are likely to be yanked. ## Reporting a Vulnerability -Please report private portions of a vulnerability to sebastian.thiel@icloud.com that would help to reproduce and fix it. To receive updates on progress and provide -general information to the public, you can create an issue [on the issue tracker](https://github.com/gitpython-developers/GitPython/issues). +Please report private portions of a vulnerability to <https://github.com/gitpython-developers/GitPython/security/advisories/new>. Doing so helps to receive updates and collaborate on the matter, without disclosing it publicliy right away. diff --git a/VERSION b/VERSION index 1f1a397..e6af1c4 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.1.37 +3.1.44 diff --git a/build-release.sh b/build-release.sh index 5840e44..1a8dce2 100755 --- a/build-release.sh +++ b/build-release.sh @@ -1,26 +1,30 @@ #!/bin/bash # +# This file is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ +# # This script builds a release. If run in a venv, it auto-installs its tools. # You may want to run "make release" instead of running this script directly. set -eEu function release_with() { - $1 -m build --sdist --wheel + "$1" -m build --sdist --wheel +} + +function suggest_venv() { + local venv_cmd='python -m venv env && source env/bin/activate' + printf "HELP: To avoid this error, use a virtual-env with '%s' instead.\n" "$venv_cmd" } -if test -n "${VIRTUAL_ENV:-}"; then +if test -n "${VIRTUAL_ENV-}"; then deps=(build twine) # Install twine along with build, as we need it later. echo "Virtual environment detected. Adding packages: ${deps[*]}" pip install --quiet --upgrade "${deps[@]}" echo 'Starting the build.' release_with python else - function suggest_venv() { - venv_cmd='python -m venv env && source env/bin/activate' - printf "HELP: To avoid this error, use a virtual-env with '%s' instead.\n" "$venv_cmd" - } trap suggest_venv ERR # This keeps the original exit (error) code. echo 'Starting the build.' - release_with python3 # Outside a venv, use python3. + release_with python3 # Outside a venv, use python3. fi diff --git a/check-version.sh b/check-version.sh index c50bf49..579cf78 100755 --- a/check-version.sh +++ b/check-version.sh @@ -1,5 +1,8 @@ #!/bin/bash # +# This file is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ +# # This script checks if we are in a consistent state to build a new release. # See the release instructions in README.md for the steps to make this pass. # You may want to run "make release" instead of running this script directly. @@ -10,29 +13,39 @@ trap 'echo "$0: Check failed. Stopping." >&2' ERR readonly version_path='VERSION' readonly changes_path='doc/source/changes.rst' +function check_status() { + git status -s "$@" + test -z "$(git status -s "$@")" +} + +function get_latest_tag() { + local config_opts + printf -v config_opts ' -c versionsort.suffix=-%s' alpha beta pre rc RC + # shellcheck disable=SC2086 # Deliberately word-splitting the arguments. + git $config_opts tag -l '[0-9]*' --sort=-v:refname | head -n1 +} + echo 'Checking current directory.' test "$(cd -- "$(dirname -- "$0")" && pwd)" = "$(pwd)" # Ugly, but portable. echo "Checking that $version_path and $changes_path exist and have no uncommitted changes." test -f "$version_path" test -f "$changes_path" -git status -s -- "$version_path" "$changes_path" -test -z "$(git status -s -- "$version_path" "$changes_path")" +check_status -- "$version_path" "$changes_path" # This section can be commented out, if absolutely necessary. echo 'Checking that ALL changes are committed.' -git status -s --ignore-submodules -test -z "$(git status -s --ignore-submodules)" +check_status --ignore-submodules -version_version="$(cat "$version_path")" +version_version="$(<"$version_path")" changes_version="$(awk '/^[0-9]/ {print $0; exit}' "$changes_path")" -config_opts="$(printf ' -c versionsort.suffix=-%s' alpha beta pre rc RC)" -latest_tag="$(git $config_opts tag -l '[0-9]*' --sort=-v:refname | head -n1)" +latest_tag="$(get_latest_tag)" head_sha="$(git rev-parse HEAD)" latest_tag_sha="$(git rev-parse "${latest_tag}^{commit}")" # Display a table of all the current version, tag, and HEAD commit information. -echo $'\nThe VERSION must be the same in all locations, and so must the HEAD and tag SHA' +echo +echo 'The VERSION must be the same in all locations, and so must the HEAD and tag SHA' printf '%-14s = %s\n' 'VERSION file' "$version_version" \ 'changes.rst' "$changes_version" \ 'Latest tag' "$latest_tag" \ diff --git a/doc/Makefile b/doc/Makefile index ef2d60e..ddeadbd 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -2,6 +2,7 @@ # # You can set these variables from the command line. +BUILDDIR = build SPHINXOPTS = -W SPHINXBUILD = sphinx-build PAPER = @@ -9,7 +10,7 @@ PAPER = # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter -ALLSPHINXOPTS = -d build/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source +ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source .PHONY: help clean html web pickle htmlhelp latex changes linkcheck @@ -24,52 +25,52 @@ help: @echo " linkcheck to check all external links for integrity" clean: - -rm -rf build/* + -rm -rf $(BUILDDIR)/* html: - mkdir -p build/html build/doctrees - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) build/html + mkdir -p $(BUILDDIR)/html $(BUILDDIR)/doctrees + $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo - @echo "Build finished. The HTML pages are in build/html." + @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." pickle: - mkdir -p build/pickle build/doctrees - $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) build/pickle + mkdir -p $(BUILDDIR)/pickle $(BUILDDIR)/doctrees + $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." web: pickle json: - mkdir -p build/json build/doctrees - $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) build/json + mkdir -p $(BUILDDIR)/json $(BUILDDIR)/doctrees + $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: - mkdir -p build/htmlhelp build/doctrees - $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) build/htmlhelp + mkdir -p $(BUILDDIR)/htmlhelp $(BUILDDIR)/doctrees + $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ - ".hhp project file in build/htmlhelp." + ".hhp project file in $(BUILDDIR)/htmlhelp." latex: - mkdir -p build/latex build/doctrees - $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) build/latex + mkdir -p $(BUILDDIR)/latex $(BUILDDIR)/doctrees + $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo - @echo "Build finished; the LaTeX files are in build/latex." + @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ "run these through (pdf)latex." changes: - mkdir -p build/changes build/doctrees - $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) build/changes + mkdir -p $(BUILDDIR)/changes $(BUILDDIR)/doctrees + $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo - @echo "The overview file is in build/changes." + @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: - mkdir -p build/linkcheck build/doctrees - $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) build/linkcheck + mkdir -p $(BUILDDIR)/linkcheck $(BUILDDIR)/doctrees + $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ - "or in build/linkcheck/output.txt." + "or in $(BUILDDIR)/linkcheck/output.txt." diff --git a/doc/requirements.txt b/doc/requirements.txt index 41a7c90..81140d8 100644 --- a/doc/requirements.txt +++ b/doc/requirements.txt @@ -1,3 +1,3 @@ -sphinx==4.3.0 +sphinx >= 7.1.2, < 7.2 sphinx_rtd_theme sphinx-autodoc-typehints diff --git a/doc/source/changes.rst b/doc/source/changes.rst index a789b06..00a3c66 100644 --- a/doc/source/changes.rst +++ b/doc/source/changes.rst @@ -2,6 +2,51 @@ Changelog ========= +3.1.44 +====== + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.44 + +3.1.43 +====== + +A major visible change will be the added deprecation- or user-warnings, +and greatly improved typing. + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.43 + +3.1.42 +====== + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.42 + +3.1.41 +====== + +This release is relevant for security as it fixes a possible arbitrary +code execution on Windows. + +See this PR for details: https://github.com/gitpython-developers/GitPython/pull/1792 +An advisory is available soon at: https://github.com/gitpython-developers/GitPython/security/advisories/GHSA-2mqj-m65w-jghx + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.41 + +3.1.40 +====== + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.40 + +3.1.38 +====== + +See the following for all changes. +https://github.com/gitpython-developers/GitPython/releases/tag/3.1.38 + 3.1.37 ====== diff --git a/doc/source/conf.py b/doc/source/conf.py index 54f1f47..8097624 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -1,33 +1,30 @@ -# -*- coding: utf-8 -*- -# -# GitPython documentation build configuration file, created by +# GitPython documentation build configuration file, originally created by # sphinx-quickstart on Sat Jan 24 11:51:01 2009. # # This file is execfile()d with the current directory set to its containing dir. # -# The contents of this file are pickled, so don't put values in the namespace -# that aren't pickleable (module imports are okay, they're removed automatically). +# The contents of this file are pickled, so don't put values in the namespace that +# aren't pickleable (module imports are okay, they're removed automatically). # -# Note that not all possible configuration values are present in this -# autogenerated file. +# Note that not all possible configuration values are present in this autogenerated +# file. # -# All configuration values have a default; values that are commented out -# serve to show the default. +# All configuration values have a default; values that are commented out serve to show +# the default. -import sys import os +import sys -# If your extensions are in another directory, add it here. If the directory -# is relative to the documentation root, use os.path.abspath to make it -# absolute, like shown here. +# If your extensions are in another directory, add it here. If the directory is relative +# to the documentation root, use os.path.abspath to make it absolute, like shown here. # sys.path.append(os.path.abspath('.')) sys.path.insert(0, os.path.abspath("../..")) # General configuration # --------------------- -# Add any Sphinx extension module names here, as strings. They can be extensions -# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. +# Add any Sphinx extension module names here, as strings. They can be extensions coming +# with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = ["sphinx.ext.autodoc", "sphinx.ext.doctest"] # Add any paths that contain templates here, relative to this directory. @@ -46,9 +43,8 @@ master_doc = "index" project = "GitPython" copyright = "Copyright (C) 2008, 2009 Michael Trier and contributors, 2010-2015 Sebastian Thiel" -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. +# The version info for the project you're documenting, acts as replacement for |version| +# and |release|, also used in various other places throughout the built documents. # # The short X.Y version. with open(os.path.join(os.path.dirname(__file__), "..", "..", "VERSION")) as fd: @@ -57,8 +53,8 @@ version = VERSION # The full version, including alpha/beta/rc tags. release = VERSION -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. +# The language for content autogenerated by Sphinx. Refer to documentation for a list of +# supported languages. # language = None # There are two options for replacing |today|: either, you set today to some @@ -70,8 +66,8 @@ release = VERSION # List of documents that shouldn't be included in the build. # unused_docs = [] -# List of directories, relative to source directory, that shouldn't be searched -# for source files. +# List of directories, relative to source directory, that shouldn't be searched for +# source files. exclude_trees = ["build"] # The reST default role (used for this markup: `text`) to use for all documents. @@ -80,17 +76,19 @@ exclude_trees = ["build"] # If true, '()' will be appended to :func: etc. cross-reference text. # add_function_parentheses = True -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). +# If true, the current module name will be prepended to all description unit titles +# (such as .. function::). # add_module_names = True -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. +# If true, sectionauthor and moduleauthor directives will be shown in the output. +# They are ignored by default. # show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = "sphinx" +manpages_url = "https://git-scm.com/docs/{page}" + # Options for HTML output # ----------------------- @@ -98,40 +96,39 @@ pygments_style = "sphinx" html_theme = "sphinx_rtd_theme" html_theme_options = {} -# The name for this set of Sphinx documents. If None, it defaults to -# "<project> v<release> documentation". +# The name for this set of Sphinx documents. +# If None, it defaults to "<project> v<release> documentation". # html_title = None -# A shorter title for the navigation bar. Default is the same as html_title. +# A shorter title for the navigation bar. Default is the same as html_title. # html_short_title = None -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. +# The name of an image file (relative to this directory) to place at the top of the +# sidebar. # html_logo = None -# The name of an image file (within the static path) to use as favicon of the -# docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. +# The name of an image file (within the static path) to use as favicon of the docs. +# This file should be a Windows icon file (.ico) being 16x16 or 32x32 pixels large. # html_favicon = None -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". +# Add any paths that contain custom static files (such as style sheets) here, relative +# to this directory. They are copied after the builtin static files, so a file named +# "default.css" will overwrite the builtin "default.css". html_static_path = [] -# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, -# using the given strftime format. +# If not '', a 'Last updated on:' timestamp is inserted at every page bottom, using the +# given strftime format. # html_last_updated_fmt = '%b %d, %Y' -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. +# If true, SmartyPants will be used to convert quotes and dashes to typographically +# correct entities. # html_use_smartypants = True # Custom sidebar templates, maps document names to template names. # html_sidebars = {} -# Additional templates that should be rendered to pages, maps page names to -# template names. +# Additional templates that should be rendered to pages, maps page names to template +# names. # html_additional_pages = {} # If false, no module index is generated. @@ -146,9 +143,9 @@ html_static_path = [] # If true, the reST sources are included in the HTML build as _sources/<name>. # html_copy_source = True -# If true, an OpenSearch description file will be output, and all pages will -# contain a <link> tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. +# If true, an OpenSearch description file will be output, and all pages will contain a +# <link> tag referring to it. The value of this option must be the base URL from which +# the finished HTML is served. # html_use_opensearch = '' # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). @@ -170,11 +167,11 @@ htmlhelp_basename = "gitpythondoc" # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, document class [howto/manual]). latex_documents = [ - ("index", "GitPython.tex", r"GitPython Documentation", r"Michael Trier", "manual"), + ("index", "GitPython.tex", "GitPython Documentation", "Michael Trier", "manual"), ] -# The name of an image file (relative to this directory) to place at the top of -# the title page. +# The name of an image file (relative to this directory) to place at the top of the +# title page. # latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, diff --git a/doc/source/index.rst b/doc/source/index.rst index 72db8ee..ca5229a 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -21,4 +21,3 @@ Indices and tables * :ref:`genindex` * :ref:`modindex` * :ref:`search` - diff --git a/doc/source/intro.rst b/doc/source/intro.rst index 4f22a09..d053bd1 100644 --- a/doc/source/intro.rst +++ b/doc/source/intro.rst @@ -122,4 +122,3 @@ License Information =================== GitPython is licensed under the New BSD License. See the LICENSE file for more information. - diff --git a/doc/source/reference.rst b/doc/source/reference.rst index 68a7f0b..13dd38d 100644 --- a/doc/source/reference.rst +++ b/doc/source/reference.rst @@ -3,13 +3,16 @@ API Reference ============= -Version -------- +Top-Level +--------- .. py:data:: git.__version__ Current GitPython version. +.. automodule:: git + :members: refresh + Objects.Base ------------ @@ -17,7 +20,7 @@ Objects.Base :members: :undoc-members: :special-members: - + Objects.Blob ------------ @@ -25,7 +28,7 @@ Objects.Blob :members: :undoc-members: :special-members: - + Objects.Commit -------------- @@ -33,7 +36,7 @@ Objects.Commit :members: :undoc-members: :special-members: - + Objects.Tag ----------- @@ -73,7 +76,7 @@ Objects.Submodule.root :members: :undoc-members: :special-members: - + Objects.Submodule.util ---------------------- @@ -81,7 +84,7 @@ Objects.Submodule.util :members: :undoc-members: :special-members: - + Objects.Util ------------- @@ -105,7 +108,7 @@ Index.Functions :members: :undoc-members: :special-members: - + Index.Types ----------- @@ -113,7 +116,7 @@ Index.Types :members: :undoc-members: :special-members: - + Index.Util ------------- @@ -121,7 +124,7 @@ Index.Util :members: :undoc-members: :special-members: - + GitCmd ------ @@ -137,7 +140,7 @@ Config :members: :undoc-members: :special-members: - + Diff ---- @@ -154,7 +157,7 @@ Exceptions :undoc-members: :special-members: - + Refs.symbolic ------------- @@ -162,7 +165,7 @@ Refs.symbolic :members: :undoc-members: :special-members: - + Refs.reference -------------- @@ -178,7 +181,7 @@ Refs.head :members: :undoc-members: :special-members: - + Refs.tag ------------ @@ -186,7 +189,7 @@ Refs.tag :members: :undoc-members: :special-members: - + Refs.remote ------------ @@ -194,7 +197,7 @@ Refs.remote :members: :undoc-members: :special-members: - + Refs.log ------------ @@ -202,7 +205,7 @@ Refs.log :members: :undoc-members: :special-members: - + Remote ------ @@ -218,7 +221,7 @@ Repo.Base :members: :undoc-members: :special-members: - + Repo.Functions -------------- @@ -227,6 +230,30 @@ Repo.Functions :undoc-members: :special-members: +Compat +------ + +.. automodule:: git.compat + :members: + :undoc-members: + :special-members: + +DB +-- + +.. automodule:: git.db + :members: + :undoc-members: + :special-members: + +Types +----- + +.. automodule:: git.types + :members: + :undoc-members: + :special-members: + Util ---- diff --git a/doc/source/roadmap.rst b/doc/source/roadmap.rst index a573df3..34c9536 100644 --- a/doc/source/roadmap.rst +++ b/doc/source/roadmap.rst @@ -6,4 +6,3 @@ The full list of milestones including associated tasks can be found on GitHub: https://github.com/gitpython-developers/GitPython/issues Select the respective milestone to filter the list of issues accordingly. - diff --git a/doc/source/tutorial.rst b/doc/source/tutorial.rst index fcbc18b..fd3b14c 100644 --- a/doc/source/tutorial.rst +++ b/doc/source/tutorial.rst @@ -413,7 +413,7 @@ If you obtained your submodule object by traversing a tree object which is not r you have to inform the submodule about its actual commit to retrieve the data from by using the ``set_parent_commit(...)`` method. -The special :class:`RootModule <git.objects.submodule.root.RootModule>` type allows you to treat your master repository as root of a hierarchy of submodules, which allows very convenient submodule handling. Its ``update(...)`` method is reimplemented to provide an advanced way of updating submodules as they change their values over time. The update method will track changes and make sure your working tree and submodule checkouts stay consistent, which is very useful in case submodules get deleted or added to name just two of the handled cases. +The special :class:`RootModule <git.objects.submodule.root.RootModule>` type allows you to treat your superproject (master repository) as root of a hierarchy of submodules, which allows very convenient submodule handling. Its ``update(...)`` method is reimplemented to provide an advanced way of updating submodules as they change their values over time. The update method will track changes and make sure your working tree and submodule checkouts stay consistent, which is very useful in case submodules get deleted or added to name just two of the handled cases. Additionally, GitPython adds functionality to track a specific branch, instead of just a commit. Supported by customized update methods, you are able to automatically update submodules to the latest revision available in the remote repository, as well as to keep track of changes and movements of these submodules. To use it, set the name of the branch you want to track to the ``submodule.$name.branch`` option of the *.gitmodules* file, and use GitPython update methods on the resulting repository with the ``to_latest_revision`` parameter turned on. In the latter case, the sha of your submodule will be ignored, instead a local tracking branch will be updated to the respective remote branch automatically, provided there are no local changes. The resulting behaviour is much like the one of svn::externals, which can be useful in times. @@ -545,4 +545,3 @@ And even more ... There is more functionality in there, like the ability to archive repositories, get stats and logs, blame, and probably a few other things that were not mentioned here. Check the unit tests for an in-depth introduction on how each function is supposed to be used. - diff --git a/fuzzing/LICENSE-APACHE b/fuzzing/LICENSE-APACHE new file mode 100644 index 0000000..8dada3e --- /dev/null +++ b/fuzzing/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/fuzzing/LICENSE-BSD b/fuzzing/LICENSE-BSD new file mode 120000 index 0000000..ea5b606 --- /dev/null +++ b/fuzzing/LICENSE-BSD @@ -0,0 +1 @@ +../LICENSE \ No newline at end of file diff --git a/fuzzing/README.md b/fuzzing/README.md new file mode 100644 index 0000000..286f529 --- /dev/null +++ b/fuzzing/README.md @@ -0,0 +1,226 @@ +# Fuzzing GitPython + +[][oss-fuzz-issue-tracker] + +This directory contains files related to GitPython's suite of fuzz tests that are executed daily on automated +infrastructure provided by [OSS-Fuzz][oss-fuzz-repo]. This document aims to provide necessary information for working +with fuzzing in GitPython. + +The latest details regarding OSS-Fuzz test status, including build logs and coverage reports, is available +on [the Open Source Fuzzing Introspection website](https://introspector.oss-fuzz.com/project-profile?project=gitpython). + +## How to Contribute + +There are many ways to contribute to GitPython's fuzzing efforts! Contributions are welcomed through issues, +discussions, or pull requests on this repository. + +Areas that are particularly appreciated include: + +- **Tackling the existing backlog of open issues**. While fuzzing is an effective way to identify bugs, that information + isn't useful unless they are fixed. If you are not sure where to start, the issues tab is a great place to get ideas! +- **Improvements to this (or other) documentation** make it easier for new contributors to get involved, so even small + improvements can have a large impact over time. If you see something that could be made easier by a documentation + update of any size, please consider suggesting it! + +For everything else, such as expanding test coverage, optimizing test performance, or enhancing error detection +capabilities, jump into the "Getting Started" section below. + +## Getting Started with Fuzzing GitPython + +> [!TIP] +> **New to fuzzing or unfamiliar with OSS-Fuzz?** +> +> These resources are an excellent place to start: +> +> - [OSS-Fuzz documentation][oss-fuzz-docs] - Continuous fuzzing service for open source software. +> - [Google/fuzzing][google-fuzzing-repo] - Tutorials, examples, discussions, research proposals, and other resources + related to fuzzing. +> - [CNCF Fuzzing Handbook](https://github.com/cncf/tag-security/blob/main/security-fuzzing-handbook/handbook-fuzzing.pdf) - + A comprehensive guide for fuzzing open source software. +> - [Efficient Fuzzing Guide by The Chromium Project](https://chromium.googlesource.com/chromium/src/+/main/testing/libfuzzer/efficient_fuzzing.md) - + Explores strategies to enhance the effectiveness of your fuzz tests, recommended for those looking to optimize their + testing efforts. + +### Setting Up Your Local Environment + +Before contributing to fuzzing efforts, ensure Python and Docker are installed on your machine. Docker is required for +running fuzzers in containers provided by OSS-Fuzz and for safely executing test files directly. [Install Docker](https://docs.docker.com/get-docker/) following the official guide if you do not already have it. + +### Understanding Existing Fuzz Targets + +Review the `fuzz-targets/` directory to familiarize yourself with how existing tests are implemented. See +the [Files & Directories Overview](#files--directories-overview) for more details on the directory structure. + +### Contributing to Fuzz Tests + +Start by reviewing the [Atheris documentation][atheris-repo] and the section +on [Running Fuzzers Locally](#running-fuzzers-locally) to begin writing or improving fuzz tests. + +## Files & Directories Overview + +The `fuzzing/` directory is organized into three key areas: + +### Fuzz Targets (`fuzz-targets/`) + +Contains Python files for each fuzz test. + +**Things to Know**: + +- Each fuzz test targets a specific part of GitPython's functionality. +- Test files adhere to the naming convention: `fuzz_<API Under Test>.py`, where `<API Under Test>` indicates the + functionality targeted by the test. +- Any functionality that involves performing operations on input data is a possible candidate for fuzz testing, but + features that involve processing untrusted user input or parsing operations are typically going to be the most + interesting. +- The goal of these tests is to identify previously unknown or unexpected error cases caused by a given input. For that + reason, fuzz tests should gracefully handle anticipated exception cases with a `try`/`except` block to avoid false + positives that halt the fuzzing engine. + +### OSS-Fuzz Scripts (`oss-fuzz-scripts/`) + +Includes scripts for building and integrating fuzz targets with OSS-Fuzz: + +- **`container-environment-bootstrap.sh`** - Sets up the execution environment. It is responsible for fetching default + dictionary entries and ensuring all required build dependencies are installed and up-to-date. +- **`build.sh`** - Executed within the Docker container, this script builds fuzz targets with necessary instrumentation + and prepares seed corpora and dictionaries for use. + +**Where to learn more:** + +- [OSS-Fuzz documentation on the build.sh](https://google.github.io/oss-fuzz/getting-started/new-project-guide/#buildsh) +- [See GitPython's build.sh and Dockerfile in the OSS-Fuzz repository](https://github.com/google/oss-fuzz/tree/master/projects/gitpython) + +### Local Development Helpers (`local-dev-helpers/`) + +Contains tools to make local development tasks easier. +See [the "Running Fuzzers Locally" section below](#running-fuzzers-locally) for further documentation and use cases related to files found here. + +## Running Fuzzers Locally + +> [!WARNING] +> **Some fuzz targets in this repository write to the filesystem** during execution. +> For that reason, it is strongly recommended to **always use Docker when executing fuzz targets**, even when it may be +> possible to do so without it. +> +> Although [I/O operations such as writing to disk are not considered best practice](https://github.com/google/fuzzing/blob/master/docs/good-fuzz-target.md#io), the current implementation of at least one test requires it. +> See [the "Setting Up Your Local Environment" section above](#setting-up-your-local-environment) if you do not already have Docker installed on your machine. +> +> PRs that replace disk I/O with in-memory alternatives are very much welcomed! + +### Direct Execution of Fuzz Targets + +Directly executing fuzz targets allows for quick iteration and testing of changes which can be helpful during early +development of new fuzz targets or for validating changes made to an existing test. +The [Dockerfile](./local-dev-helpers/Dockerfile) located in the `local-dev-helpers/` subdirectory provides a lightweight +container environment preconfigured with [Atheris][atheris-repo] that makes it easy to execute a fuzz target directly. + +**From the root directory of your GitPython repository clone**: + +1. Build the local development helper image: + +```shell +docker build -f fuzzing/local-dev-helpers/Dockerfile -t gitpython-fuzzdev . +``` + +2. Then execute a fuzz target inside the image, for example: + +```shell + docker run -it -v "$PWD":/src gitpython-fuzzdev python fuzzing/fuzz-targets/fuzz_config.py -atheris_runs=10000 +``` + +The above command executes [`fuzz_config.py`](./fuzz-targets/fuzz_config.py) and exits after `10000` runs, or earlier if +the fuzzer finds an error. + +Docker CLI's `-v` flag specifies a volume mount in Docker that maps the directory in which the command is run (which +should be the root directory of your local GitPython clone) to a directory inside the container, so any modifications +made between invocations will be reflected immediately without the need to rebuild the image each time. + +### Running OSS-Fuzz Locally + +This approach uses Docker images provided by OSS-Fuzz for building and running fuzz tests locally. It offers +comprehensive features but requires a local clone of the OSS-Fuzz repository and sufficient disk space for Docker +containers. + +#### Build the Execution Environment + +Clone the OSS-Fuzz repository and prepare the Docker environment: + +```shell +git clone --depth 1 https://github.com/google/oss-fuzz.git oss-fuzz +cd oss-fuzz +python infra/helper.py build_image gitpython +python infra/helper.py build_fuzzers --sanitizer address gitpython +``` + +> [!TIP] +> The `build_fuzzers` command above accepts a local file path pointing to your GitPython repository clone as the last +> argument. +> This makes it easy to build fuzz targets you are developing locally in this repository without changing anything in +> the OSS-Fuzz repo! +> For example, if you have cloned this repository (or a fork of it) into: `~/code/GitPython` +> Then running this command would build new or modified fuzz targets using the `~/code/GitPython/fuzzing/fuzz-targets` +> directory: +> ```shell +> python infra/helper.py build_fuzzers --sanitizer address gitpython ~/code/GitPython +> ``` + +Verify the build of your fuzzers with the optional `check_build` command: + +```shell +python infra/helper.py check_build gitpython +``` + +#### Run a Fuzz Target + +Setting an environment variable for the fuzz target argument of the execution command makes it easier to quickly select +a different target between runs: + +```shell +# specify the fuzz target without the .py extension: +export FUZZ_TARGET=fuzz_config +``` + +Execute the desired fuzz target: + +```shell +python infra/helper.py run_fuzzer gitpython $FUZZ_TARGET -- -max_total_time=60 -print_final_stats=1 +``` + +> [!TIP] +> In the example above, the "`-- -max_total_time=60 -print_final_stats=1`" portion of the command is optional but quite +> useful. +> +> Every argument provided after "`--`" in the above command is passed to the fuzzing engine directly. In this case: +> - `-max_total_time=60` tells the LibFuzzer to stop execution after 60 seconds have elapsed. +> - `-print_final_stats=1` tells the LibFuzzer to print a summary of useful metrics about the target run upon + completion. +> +> But almost any [LibFuzzer option listed in the documentation](https://llvm.org/docs/LibFuzzer.html#options) should +> work as well. + +#### Next Steps + +For detailed instructions on advanced features like reproducing OSS-Fuzz issues or using the Fuzz Introspector, refer +to [the official OSS-Fuzz documentation][oss-fuzz-docs]. + +## LICENSE + +All files located within the `fuzzing/` directory are subject to [the same license](../LICENSE) +as [the other files in this repository](../README.md#license) with one exception: + +[`fuzz_config.py`](./fuzz-targets/fuzz_config.py) was migrated to this repository from the OSS-Fuzz project's repository +where it was originally created. As such, [`fuzz_config.py`](./fuzz-targets/fuzz_config.py) retains its original license +and copyright notice (Apache License, Version 2.0 and Copyright 2023 Google LLC respectively) as in a header +comment, followed by a notice stating that it has have been modified contributors to GitPython. +[LICENSE-APACHE](./LICENSE-APACHE) contains the original license used by the OSS-Fuzz project repository at the time the +file was migrated. + +[oss-fuzz-repo]: https://github.com/google/oss-fuzz + +[oss-fuzz-docs]: https://google.github.io/oss-fuzz + +[oss-fuzz-issue-tracker]: https://bugs.chromium.org/p/oss-fuzz/issues/list?sort=-opened&can=1&q=proj:gitpython + +[google-fuzzing-repo]: https://github.com/google/fuzzing + +[atheris-repo]: https://github.com/google/atheris diff --git a/fuzzing/fuzz-targets/fuzz_blob.py b/fuzzing/fuzz-targets/fuzz_blob.py new file mode 100644 index 0000000..ce888e8 --- /dev/null +++ b/fuzzing/fuzz-targets/fuzz_blob.py @@ -0,0 +1,40 @@ +import atheris +import sys +import os +import tempfile + +if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): + path_to_bundled_git_binary = os.path.abspath(os.path.join(os.path.dirname(__file__), "git")) + os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = path_to_bundled_git_binary + +with atheris.instrument_imports(): + import git + + +def TestOneInput(data): + fdp = atheris.FuzzedDataProvider(data) + + with tempfile.TemporaryDirectory() as temp_dir: + repo = git.Repo.init(path=temp_dir) + binsha = fdp.ConsumeBytes(20) + mode = fdp.ConsumeInt(fdp.ConsumeIntInRange(0, fdp.remaining_bytes())) + path = fdp.ConsumeUnicodeNoSurrogates(fdp.remaining_bytes()) + + try: + blob = git.Blob(repo, binsha, mode, path) + except AssertionError as e: + if "Require 20 byte binary sha, got" in str(e): + return -1 + else: + raise e + + _ = blob.mime_type + + +def main(): + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main() diff --git a/fuzzing/fuzz-targets/fuzz_config.py b/fuzzing/fuzz-targets/fuzz_config.py new file mode 100644 index 0000000..4eddc32 --- /dev/null +++ b/fuzzing/fuzz-targets/fuzz_config.py @@ -0,0 +1,57 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +############################################################################### +# Note: This file has been modified by contributors to GitPython. +# The original state of this file may be referenced here: +# https://github.com/google/oss-fuzz/commit/f26f254558fc48f3c9bc130b10507386b94522da +############################################################################### +import atheris +import sys +import io +import os +from configparser import MissingSectionHeaderError, ParsingError + +if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): + path_to_bundled_git_binary = os.path.abspath(os.path.join(os.path.dirname(__file__), "git")) + os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = path_to_bundled_git_binary + +with atheris.instrument_imports(): + import git + + +def TestOneInput(data): + sio = io.BytesIO(data) + sio.name = "/tmp/fuzzconfig.config" + git_config = git.GitConfigParser(sio) + try: + git_config.read() + except (MissingSectionHeaderError, ParsingError, UnicodeDecodeError): + return -1 # Reject inputs raising expected exceptions + except ValueError as e: + if "embedded null byte" in str(e): + # The `os.path.expanduser` function, which does not accept strings + # containing null bytes might raise this. + return -1 + else: + raise e # Raise unanticipated exceptions as they might be bugs + + +def main(): + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main() diff --git a/fuzzing/fuzz-targets/fuzz_diff.py b/fuzzing/fuzz-targets/fuzz_diff.py new file mode 100644 index 0000000..d4bd68b --- /dev/null +++ b/fuzzing/fuzz-targets/fuzz_diff.py @@ -0,0 +1,86 @@ +import sys +import os +import io +import tempfile +from binascii import Error as BinasciiError + +import atheris + +if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): + path_to_bundled_git_binary = os.path.abspath(os.path.join(os.path.dirname(__file__), "git")) + os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = path_to_bundled_git_binary + +with atheris.instrument_imports(): + from git import Repo, Diff + + +class BytesProcessAdapter: + """Allows bytes to be used as process objects returned by subprocess.Popen.""" + + @atheris.instrument_func + def __init__(self, input_string): + self.stdout = io.BytesIO(input_string) + self.stderr = io.BytesIO() + + @atheris.instrument_func + def wait(self): + return 0 + + poll = wait + + +@atheris.instrument_func +def TestOneInput(data): + fdp = atheris.FuzzedDataProvider(data) + + with tempfile.TemporaryDirectory() as temp_dir: + repo = Repo.init(path=temp_dir) + try: + diff = Diff( + repo, + a_rawpath=fdp.ConsumeBytes(fdp.ConsumeIntInRange(0, fdp.remaining_bytes())), + b_rawpath=fdp.ConsumeBytes(fdp.ConsumeIntInRange(0, fdp.remaining_bytes())), + a_blob_id=fdp.ConsumeBytes(20), + b_blob_id=fdp.ConsumeBytes(20), + a_mode=fdp.ConsumeBytes(fdp.ConsumeIntInRange(0, fdp.remaining_bytes())), + b_mode=fdp.ConsumeBytes(fdp.ConsumeIntInRange(0, fdp.remaining_bytes())), + new_file=fdp.ConsumeBool(), + deleted_file=fdp.ConsumeBool(), + copied_file=fdp.ConsumeBool(), + raw_rename_from=fdp.ConsumeBytes(fdp.ConsumeIntInRange(0, fdp.remaining_bytes())), + raw_rename_to=fdp.ConsumeBytes(fdp.ConsumeIntInRange(0, fdp.remaining_bytes())), + diff=fdp.ConsumeBytes(fdp.ConsumeIntInRange(0, fdp.remaining_bytes())), + change_type=fdp.PickValueInList(["A", "D", "C", "M", "R", "T", "U"]), + score=fdp.ConsumeIntInRange(0, fdp.remaining_bytes()), + ) + except BinasciiError: + return -1 + except AssertionError as e: + if "Require 20 byte binary sha, got" in str(e): + return -1 + else: + raise e + + _ = diff.__str__() + _ = diff.a_path + _ = diff.b_path + _ = diff.rename_from + _ = diff.rename_to + _ = diff.renamed_file + + diff_index = diff._index_from_patch_format( + repo, proc=BytesProcessAdapter(fdp.ConsumeBytes(fdp.ConsumeIntInRange(0, fdp.remaining_bytes()))) + ) + + diff._handle_diff_line( + lines_bytes=fdp.ConsumeBytes(fdp.ConsumeIntInRange(0, fdp.remaining_bytes())), repo=repo, index=diff_index + ) + + +def main(): + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main() diff --git a/fuzzing/fuzz-targets/fuzz_repo.py b/fuzzing/fuzz-targets/fuzz_repo.py new file mode 100644 index 0000000..7bd82c1 --- /dev/null +++ b/fuzzing/fuzz-targets/fuzz_repo.py @@ -0,0 +1,47 @@ +import atheris +import io +import sys +import os +import tempfile + +if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): + path_to_bundled_git_binary = os.path.abspath(os.path.join(os.path.dirname(__file__), "git")) + os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = path_to_bundled_git_binary + +with atheris.instrument_imports(): + import git + + +def TestOneInput(data): + fdp = atheris.FuzzedDataProvider(data) + + with tempfile.TemporaryDirectory() as temp_dir: + repo = git.Repo.init(path=temp_dir) + + # Generate a minimal set of files based on fuzz data to minimize I/O operations. + file_paths = [os.path.join(temp_dir, f"File{i}") for i in range(min(3, fdp.ConsumeIntInRange(1, 3)))] + for file_path in file_paths: + with open(file_path, "wb") as f: + # The chosen upperbound for count of bytes we consume by writing to these + # files is somewhat arbitrary and may be worth experimenting with if the + # fuzzer coverage plateaus. + f.write(fdp.ConsumeBytes(fdp.ConsumeIntInRange(1, 512))) + + repo.index.add(file_paths) + repo.index.commit(fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 80))) + + fuzz_tree = git.Tree(repo, git.Tree.NULL_BIN_SHA, 0, "") + + try: + fuzz_tree._deserialize(io.BytesIO(data)) + except IndexError: + return -1 + + +def main(): + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main() diff --git a/fuzzing/fuzz-targets/fuzz_submodule.py b/fuzzing/fuzz-targets/fuzz_submodule.py new file mode 100644 index 0000000..d22b0aa --- /dev/null +++ b/fuzzing/fuzz-targets/fuzz_submodule.py @@ -0,0 +1,100 @@ +import atheris +import sys +import os +import tempfile +from configparser import ParsingError +from utils import ( + setup_git_environment, + handle_exception, + get_max_filename_length, +) + +# Setup the git environment +setup_git_environment() +from git import Repo, GitCommandError, InvalidGitRepositoryError + + +def TestOneInput(data): + fdp = atheris.FuzzedDataProvider(data) + + with tempfile.TemporaryDirectory() as repo_temp_dir: + repo = Repo.init(path=repo_temp_dir) + repo.index.commit("Initial commit") + + try: + with tempfile.TemporaryDirectory() as submodule_temp_dir: + sub_repo = Repo.init(submodule_temp_dir, bare=fdp.ConsumeBool()) + sub_repo.index.commit(fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 512))) + + submodule_name = fdp.ConsumeUnicodeNoSurrogates( + fdp.ConsumeIntInRange(1, max(1, get_max_filename_length(repo.working_tree_dir))) + ) + submodule_path = os.path.join(repo.working_tree_dir, submodule_name) + + submodule = repo.create_submodule(submodule_name, submodule_path, url=sub_repo.git_dir) + repo.index.commit("Added submodule") + + with submodule.config_writer() as writer: + key_length = fdp.ConsumeIntInRange(1, max(1, fdp.remaining_bytes())) + value_length = fdp.ConsumeIntInRange(1, max(1, fdp.remaining_bytes())) + + writer.set_value( + fdp.ConsumeUnicodeNoSurrogates(key_length), fdp.ConsumeUnicodeNoSurrogates(value_length) + ) + writer.release() + + submodule.update(init=fdp.ConsumeBool(), dry_run=fdp.ConsumeBool(), force=fdp.ConsumeBool()) + submodule_repo = submodule.module() + + new_file_name = fdp.ConsumeUnicodeNoSurrogates( + fdp.ConsumeIntInRange(1, max(1, get_max_filename_length(submodule_repo.working_tree_dir))) + ) + new_file_path = os.path.join(submodule_repo.working_tree_dir, new_file_name) + with open(new_file_path, "wb") as new_file: + new_file.write(fdp.ConsumeBytes(fdp.ConsumeIntInRange(1, 512))) + submodule_repo.index.add([new_file_path]) + submodule_repo.index.commit("Added new file to submodule") + + repo.submodule_update(recursive=fdp.ConsumeBool()) + submodule_repo.head.reset(commit="HEAD~1", working_tree=fdp.ConsumeBool(), head=fdp.ConsumeBool()) + # Use fdp.PickValueInList to ensure at least one of 'module' or 'configuration' is True + module_option_value, configuration_option_value = fdp.PickValueInList( + [(True, False), (False, True), (True, True)] + ) + submodule.remove( + module=module_option_value, + configuration=configuration_option_value, + dry_run=fdp.ConsumeBool(), + force=fdp.ConsumeBool(), + ) + repo.index.commit(f"Removed submodule {submodule_name}") + + except ( + ParsingError, + GitCommandError, + InvalidGitRepositoryError, + FileNotFoundError, + FileExistsError, + IsADirectoryError, + NotADirectoryError, + BrokenPipeError, + PermissionError, + ): + return -1 + except Exception as e: + if isinstance(e, ValueError) and "embedded null byte" in str(e): + return -1 + elif isinstance(e, OSError) and "File name too long" in str(e): + return -1 + else: + return handle_exception(e) + + +def main(): + atheris.instrument_all() + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main() diff --git a/fuzzing/fuzz-targets/utils.py b/fuzzing/fuzz-targets/utils.py new file mode 100644 index 0000000..97e6eab --- /dev/null +++ b/fuzzing/fuzz-targets/utils.py @@ -0,0 +1,122 @@ +import atheris # pragma: no cover +import os # pragma: no cover +import re # pragma: no cover +import traceback # pragma: no cover +import sys # pragma: no cover +from typing import Set, Tuple, List # pragma: no cover + + +@atheris.instrument_func +def is_expected_exception_message(exception: Exception, error_message_list: List[str]) -> bool: # pragma: no cover + """ + Checks if the message of a given exception matches any of the expected error messages, case-insensitively. + + Args: + exception (Exception): The exception object raised during execution. + error_message_list (List[str]): A list of error message substrings to check against the exception's message. + + Returns: + bool: True if the exception's message contains any of the substrings from the error_message_list, + case-insensitively, otherwise False. + """ + exception_message = str(exception).lower() + for error in error_message_list: + if error.lower() in exception_message: + return True + return False + + +@atheris.instrument_func +def get_max_filename_length(path: str) -> int: # pragma: no cover + """ + Get the maximum filename length for the filesystem containing the given path. + + Args: + path (str): The path to check the filesystem for. + + Returns: + int: The maximum filename length. + """ + return os.pathconf(path, "PC_NAME_MAX") + + +@atheris.instrument_func +def read_lines_from_file(file_path: str) -> list: + """Read lines from a file and return them as a list.""" + try: + with open(file_path, "r") as f: + return [line.strip() for line in f if line.strip()] + except FileNotFoundError: + print(f"File not found: {file_path}") + return [] + except IOError as e: + print(f"Error reading file {file_path}: {e}") + return [] + + +@atheris.instrument_func +def load_exception_list(file_path: str = "explicit-exceptions-list.txt") -> Set[Tuple[str, str]]: + """Load and parse the exception list from a default or specified file.""" + try: + bundle_dir = os.path.dirname(os.path.abspath(__file__)) + full_path = os.path.join(bundle_dir, file_path) + lines = read_lines_from_file(full_path) + exception_list: Set[Tuple[str, str]] = set() + for line in lines: + match = re.match(r"(.+):(\d+):", line) + if match: + file_path: str = match.group(1).strip() + line_number: str = str(match.group(2).strip()) + exception_list.add((file_path, line_number)) + return exception_list + except Exception as e: + print(f"Error loading exception list: {e}") + return set() + + +@atheris.instrument_func +def match_exception_with_traceback(exception_list: Set[Tuple[str, str]], exc_traceback) -> bool: + """Match exception traceback with the entries in the exception list.""" + for filename, lineno, _, _ in traceback.extract_tb(exc_traceback): + for file_pattern, line_pattern in exception_list: + # Ensure filename and line_number are strings for regex matching + if re.fullmatch(file_pattern, filename) and re.fullmatch(line_pattern, str(lineno)): + return True + return False + + +@atheris.instrument_func +def check_exception_against_list(exc_traceback, exception_file: str = "explicit-exceptions-list.txt") -> bool: + """Check if the exception traceback matches any entry in the exception list.""" + exception_list = load_exception_list(exception_file) + return match_exception_with_traceback(exception_list, exc_traceback) + + +@atheris.instrument_func +def handle_exception(e: Exception) -> int: + """Encapsulate exception handling logic for reusability.""" + exc_traceback = e.__traceback__ + if check_exception_against_list(exc_traceback): + return -1 + else: + raise e + + +@atheris.instrument_func +def setup_git_environment() -> None: + """Set up the environment variables for Git.""" + bundle_dir = os.path.dirname(os.path.abspath(__file__)) + if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): # pragma: no cover + bundled_git_binary_path = os.path.join(bundle_dir, "git") + os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = bundled_git_binary_path + + if not sys.warnoptions: # pragma: no cover + # The warnings filter below can be overridden by passing the -W option + # to the Python interpreter command line or setting the `PYTHONWARNINGS` environment variable. + import warnings + import logging + + # Fuzzing data causes some modules to generate a large number of warnings + # which are not usually interesting and make the test output hard to read, so we ignore them. + warnings.simplefilter("ignore") + logging.getLogger().setLevel(logging.ERROR) diff --git a/fuzzing/local-dev-helpers/Dockerfile b/fuzzing/local-dev-helpers/Dockerfile new file mode 100644 index 0000000..426de05 --- /dev/null +++ b/fuzzing/local-dev-helpers/Dockerfile @@ -0,0 +1,22 @@ +# syntax=docker/dockerfile:1 + +# Use the same Python version as OSS-Fuzz to accidental incompatibilities in test code +FROM python:3.8-bookworm + +LABEL project="GitPython Fuzzing Local Dev Helper" + +WORKDIR /src + +COPY . . + +# Update package managers, install necessary packages, and cleanup unnecessary files in a single RUN to keep the image smaller. +RUN apt-get update && \ + apt-get install -y git clang && \ + python -m pip install --upgrade pip && \ + python -m pip install atheris && \ + python -m pip install -e . && \ + apt-get clean && \ + apt-get autoremove -y && \ + rm -rf /var/lib/apt/lists/* + +CMD ["bash"] diff --git a/fuzzing/oss-fuzz-scripts/build.sh b/fuzzing/oss-fuzz-scripts/build.sh new file mode 100644 index 0000000..c156e87 --- /dev/null +++ b/fuzzing/oss-fuzz-scripts/build.sh @@ -0,0 +1,19 @@ +# shellcheck shell=bash +# +# This file is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +set -euo pipefail + +python3 -m pip install . + +find "$SRC" -maxdepth 1 \ + \( -name '*_seed_corpus.zip' -o -name '*.options' -o -name '*.dict' \) \ + -exec printf '[%s] Copying: %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" {} \; \ + -exec chmod a-x {} \; \ + -exec cp {} "$OUT" \; + +# Build fuzzers in $OUT. +find "$SRC/gitpython/fuzzing" -name 'fuzz_*.py' -print0 | while IFS= read -r -d '' fuzz_harness; do + compile_python_fuzzer "$fuzz_harness" --add-binary="$(command -v git):." --add-data="$SRC/explicit-exceptions-list.txt:." +done diff --git a/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh b/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh new file mode 100755 index 0000000..924a3cb --- /dev/null +++ b/fuzzing/oss-fuzz-scripts/container-environment-bootstrap.sh @@ -0,0 +1,108 @@ +#!/usr/bin/env bash +# +# This file is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +set -euo pipefail + +################# +# Prerequisites # +################# + +for cmd in python3 git wget zip; do + command -v "$cmd" >/dev/null 2>&1 || { + printf '[%s] Required command %s not found, exiting.\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$cmd" >&2 + exit 1 + } +done + +############# +# Functions # +############# + +download_and_concatenate_common_dictionaries() { + # Assign the first argument as the target file where all contents will be concatenated + local target_file="$1" + + # Shift the arguments so the first argument (target_file path) is removed + # and only URLs are left for the loop below. + shift + + for url in "$@"; do + wget -qO- "$url" >>"$target_file" + # Ensure there's a newline between each file's content + echo >>"$target_file" + done +} + +create_seed_corpora_zips() { + local seed_corpora_dir="$1" + local output_zip + for dir in "$seed_corpora_dir"/*; do + if [ -d "$dir" ] && [ -n "$dir" ]; then + output_zip="$SRC/$(basename "$dir")_seed_corpus.zip" + printf '[%s] Zipping the contents of %s into %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$dir" "$output_zip" + zip -jur "$output_zip" "$dir"/* + fi + done +} + +prepare_dictionaries_for_fuzz_targets() { + local dictionaries_dir="$1" + local fuzz_targets_dir="$2" + local common_base_dictionary_filename="$WORK/__base.dict" + + printf '[%s] Copying .dict files from %s to %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$dictionaries_dir" "$SRC/" + cp -v "$dictionaries_dir"/*.dict "$SRC/" + + download_and_concatenate_common_dictionaries "$common_base_dictionary_filename" \ + "https://raw.githubusercontent.com/google/fuzzing/master/dictionaries/utf8.dict" \ + "https://raw.githubusercontent.com/google/fuzzing/master/dictionaries/url.dict" + + find "$fuzz_targets_dir" -name 'fuzz_*.py' -print0 | while IFS= read -r -d '' fuzz_harness; do + if [[ -r "$common_base_dictionary_filename" ]]; then + # Strip the `.py` extension from the filename and replace it with `.dict`. + fuzz_harness_dictionary_filename="$(basename "$fuzz_harness" .py).dict" + local output_file="$SRC/$fuzz_harness_dictionary_filename" + + printf '[%s] Appending %s to %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$common_base_dictionary_filename" "$output_file" + if [[ -s "$output_file" ]]; then + # If a dictionary file for this fuzzer already exists and is not empty, + # we append a new line to the end of it before appending any new entries. + # + # LibFuzzer will happily ignore multiple empty lines in a dictionary but fail with an error + # if any single line has incorrect syntax (e.g., if we accidentally add two entries to the same line.) + # See docs for valid syntax: https://llvm.org/docs/LibFuzzer.html#id32 + echo >>"$output_file" + fi + cat "$common_base_dictionary_filename" >>"$output_file" + fi + done +} + +######################## +# Main execution logic # +######################## +# Seed corpora and dictionaries are hosted in a separate repository to avoid additional bloat in this repo. +# We clone into the $WORK directory because OSS-Fuzz cleans it up after building the image, keeping the image small. +git clone --depth 1 https://github.com/gitpython-developers/qa-assets.git "$WORK/qa-assets" + +create_seed_corpora_zips "$WORK/qa-assets/gitpython/corpora" + +prepare_dictionaries_for_fuzz_targets "$WORK/qa-assets/gitpython/dictionaries" "$SRC/gitpython/fuzzing" + +pushd "$SRC/gitpython/" +# Search for 'raise' and 'assert' statements in Python files within GitPython's source code and submodules, saving the +# matched file path, line number, and line content to a file named 'explicit-exceptions-list.txt'. +# This file can then be used by fuzz harnesses to check exception tracebacks and filter out explicitly raised or otherwise +# anticipated exceptions to reduce false positive test failures. + +git grep -n --recurse-submodules -e '\braise\b' -e '\bassert\b' -- '*.py' -- ':!setup.py' -- ':!test/**' -- ':!fuzzing/**' > "$SRC/explicit-exceptions-list.txt" + +popd + + +# The OSS-Fuzz base image has outdated dependencies by default so we upgrade them below. +python3 -m pip install --upgrade pip +# Upgrade to the latest versions known to work at the time the below changes were introduced: +python3 -m pip install 'setuptools~=69.0' 'pyinstaller~=6.0' diff --git a/git/__init__.py b/git/__init__.py index e2d123f..1b2360e 100644 --- a/git/__init__.py +++ b/git/__init__.py @@ -1,92 +1,300 @@ -# __init__.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ -# flake8: noqa +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + # @PydevCodeAnalysisIgnore -from git.exc import * # @NoMove @IgnorePep8 -import inspect -import os -import sys -import os.path as osp -from typing import Optional -from git.types import PathLike +__all__ = [ + "Actor", + "AmbiguousObjectName", + "BadName", + "BadObject", + "BadObjectType", + "BaseIndexEntry", + "Blob", + "BlobFilter", + "BlockingLockFile", + "CacheError", + "CheckoutError", + "CommandError", + "Commit", + "Diff", + "DiffConstants", + "DiffIndex", + "Diffable", + "FetchInfo", + "Git", + "GitCmdObjectDB", + "GitCommandError", + "GitCommandNotFound", + "GitConfigParser", + "GitDB", + "GitError", + "HEAD", + "Head", + "HookExecutionError", + "INDEX", + "IndexEntry", + "IndexFile", + "IndexObject", + "InvalidDBRoot", + "InvalidGitRepositoryError", + "List", # Deprecated - import this from `typing` instead. + "LockFile", + "NULL_TREE", + "NoSuchPathError", + "ODBError", + "Object", + "Optional", # Deprecated - import this from `typing` instead. + "ParseError", + "PathLike", + "PushInfo", + "RefLog", + "RefLogEntry", + "Reference", + "Remote", + "RemoteProgress", + "RemoteReference", + "Repo", + "RepositoryDirtyError", + "RootModule", + "RootUpdateProgress", + "Sequence", # Deprecated - import from `typing`, or `collections.abc` in 3.9+. + "StageType", + "Stats", + "Submodule", + "SymbolicReference", + "TYPE_CHECKING", # Deprecated - import this from `typing` instead. + "Tag", + "TagObject", + "TagReference", + "Tree", + "TreeModifier", + "Tuple", # Deprecated - import this from `typing` instead. + "Union", # Deprecated - import this from `typing` instead. + "UnmergedEntriesError", + "UnsafeOptionError", + "UnsafeProtocolError", + "UnsupportedOperation", + "UpdateProgress", + "WorkTreeRepositoryUnsupported", + "refresh", + "remove_password_if_present", + "rmtree", + "safe_decode", + "to_hex_sha", +] __version__ = "git" +from typing import Any, List, Optional, Sequence, TYPE_CHECKING, Tuple, Union -# { Initialization -def _init_externals() -> None: - """Initialize external projects by putting them into the path""" - if __version__ == "git" and "PYOXIDIZER" not in os.environ: - sys.path.insert(1, osp.join(osp.dirname(__file__), "ext", "gitdb")) - - try: - import gitdb - except ImportError as e: - raise ImportError("'gitdb' could not be found in your PYTHONPATH") from e - # END verify import - - -# } END initialization +if TYPE_CHECKING: + from types import ModuleType +import warnings -################# -_init_externals() -################# +from gitdb.util import to_hex_sha -# { Imports +from git.exc import ( + AmbiguousObjectName, + BadName, + BadObject, + BadObjectType, + CacheError, + CheckoutError, + CommandError, + GitCommandError, + GitCommandNotFound, + GitError, + HookExecutionError, + InvalidDBRoot, + InvalidGitRepositoryError, + NoSuchPathError, + ODBError, + ParseError, + RepositoryDirtyError, + UnmergedEntriesError, + UnsafeOptionError, + UnsafeProtocolError, + UnsupportedOperation, + WorkTreeRepositoryUnsupported, +) +from git.types import PathLike try: - from git.config import GitConfigParser # @NoMove @IgnorePep8 - from git.objects import * # @NoMove @IgnorePep8 - from git.refs import * # @NoMove @IgnorePep8 - from git.diff import * # @NoMove @IgnorePep8 - from git.db import * # @NoMove @IgnorePep8 - from git.cmd import Git # @NoMove @IgnorePep8 - from git.repo import Repo # @NoMove @IgnorePep8 - from git.remote import * # @NoMove @IgnorePep8 - from git.index import * # @NoMove @IgnorePep8 - from git.util import ( # @NoMove @IgnorePep8 - LockFile, + from git.compat import safe_decode # @NoMove + from git.config import GitConfigParser # @NoMove + from git.objects import ( # @NoMove + Blob, + Commit, + IndexObject, + Object, + RootModule, + RootUpdateProgress, + Submodule, + TagObject, + Tree, + TreeModifier, + UpdateProgress, + ) + from git.refs import ( # @NoMove + HEAD, + Head, + RefLog, + RefLogEntry, + Reference, + RemoteReference, + SymbolicReference, + Tag, + TagReference, + ) + from git.diff import ( # @NoMove + INDEX, + NULL_TREE, + Diff, + DiffConstants, + DiffIndex, + Diffable, + ) + from git.db import GitCmdObjectDB, GitDB # @NoMove + from git.cmd import Git # @NoMove + from git.repo import Repo # @NoMove + from git.remote import FetchInfo, PushInfo, Remote, RemoteProgress # @NoMove + from git.index import ( # @NoMove + BaseIndexEntry, + BlobFilter, + CheckoutError, + IndexEntry, + IndexFile, + StageType, + # NOTE: This tells type checkers what util resolves to. We delete it, and it is + # really resolved by __getattr__, which warns. See below on what to use instead. + util, + ) + from git.util import ( # @NoMove + Actor, BlockingLockFile, + LockFile, Stats, - Actor, + remove_password_if_present, rmtree, ) except GitError as _exc: raise ImportError("%s: %s" % (_exc.__class__.__name__, _exc)) from _exc -# } END imports -__all__ = [name for name, obj in locals().items() if not (name.startswith("_") or inspect.ismodule(obj))] +def _warned_import(message: str, fullname: str) -> "ModuleType": + import importlib + + warnings.warn(message, DeprecationWarning, stacklevel=3) + return importlib.import_module(fullname) + + +def _getattr(name: str) -> Any: + # TODO: If __version__ is made dynamic and lazily fetched, put that case right here. + if name == "util": + return _warned_import( + "The expression `git.util` and the import `from git import util` actually " + "reference git.index.util, and not the git.util module accessed in " + '`from git.util import XYZ` or `sys.modules["git.util"]`. This potentially ' + "confusing behavior is currently preserved for compatibility, but may be " + "changed in the future and should not be relied on.", + fullname="git.index.util", + ) + + for names, prefix in ( + ({"head", "log", "reference", "symbolic", "tag"}, "git.refs"), + ({"base", "fun", "typ"}, "git.index"), + ): + if name not in names: + continue + + fullname = f"{prefix}.{name}" + + return _warned_import( + f"{__name__}.{name} is a private alias of {fullname} and subject to " + f"immediate removal. Use {fullname} instead.", + fullname=fullname, + ) + + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + +if not TYPE_CHECKING: + # NOTE: The expression `git.util` gives git.index.util and `from git import util` + # imports git.index.util, NOT git.util. It may not be feasible to change this until + # the next major version, to avoid breaking code inadvertently relying on it. + # + # - If git.index.util *is* what you want, use (or import from) that, to avoid + # confusion. + # + # - To use the "real" git.util module, write `from git.util import ...`, or if + # necessary access it as `sys.modules["git.util"]`. + # + # Note also that `import git.util` technically imports the "real" git.util... but + # the *expression* `git.util` after doing so is still git.index.util! + # + # (This situation differs from that of other indirect-submodule imports that are + # unambiguously non-public and subject to immediate removal. Here, the public + # git.util module, though different, makes less discoverable that the expression + # `git.util` refers to a non-public attribute of the git module.) + # + # This had originally come about by a wildcard import. Now that all intended imports + # are explicit, the intuitive but potentially incompatible binding occurs due to the + # usual rules for Python submodule bindings. So for now we replace that binding with + # git.index.util, delete that, and let __getattr__ handle it and issue a warning. + # + # For the same runtime behavior, it would be enough to forgo importing util, and + # delete util as created naturally; __getattr__ would behave the same. But type + # checkers would not know what util refers to when accessed as an attribute of git. + del util + + # This is "hidden" to preserve static checking for undefined/misspelled attributes. + __getattr__ = _getattr # { Initialize git executable path + GIT_OK = None def refresh(path: Optional[PathLike] = None) -> None: - """Convenience method for setting the git executable path.""" + """Convenience method for setting the git executable path. + + :param path: + Optional path to the Git executable. If not absolute, it is resolved + immediately, relative to the current directory. + + :note: + The `path` parameter is usually omitted and cannot be used to specify a custom + command whose location is looked up in a path search on each call. See + :meth:`Git.refresh <git.cmd.Git.refresh>` for details on how to achieve this. + + :note: + This calls :meth:`Git.refresh <git.cmd.Git.refresh>` and sets other global + configuration according to the effect of doing so. As such, this function should + usually be used instead of using :meth:`Git.refresh <git.cmd.Git.refresh>` or + :meth:`FetchInfo.refresh <git.remote.FetchInfo.refresh>` directly. + + :note: + This function is called automatically, with no arguments, at import time. + """ global GIT_OK GIT_OK = False if not Git.refresh(path=path): return - if not FetchInfo.refresh(): - return # type: ignore [unreachable] + if not FetchInfo.refresh(): # noqa: F405 + return # type: ignore[unreachable] GIT_OK = True -# } END initialize git executable path - - -################# try: refresh() except Exception as _exc: raise ImportError("Failed to initialize: {0}".format(_exc)) from _exc -################# + +# } END initialize git executable path diff --git a/git/cmd.py b/git/cmd.py index 9921dd6..2048a43 100644 --- a/git/cmd.py +++ b/git/cmd.py @@ -1,33 +1,40 @@ -# cmd.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + from __future__ import annotations -import re + +__all__ = ["GitMeta", "Git"] + import contextlib import io +import itertools import logging import os +import re import signal -from subprocess import call, Popen, PIPE, DEVNULL import subprocess -import threading +from subprocess import DEVNULL, PIPE, Popen +import sys from textwrap import dedent - -from git.compat import ( - defenc, - force_bytes, - safe_decode, - is_posix, - is_win, +import threading +import warnings + +from git.compat import defenc, force_bytes, safe_decode +from git.exc import ( + CommandError, + GitCommandError, + GitCommandNotFound, + UnsafeOptionError, + UnsafeProtocolError, ) -from git.exc import CommandError -from git.util import is_cygwin_git, cygpath, expand_path, remove_password_if_present, patch_env - -from .exc import GitCommandError, GitCommandNotFound, UnsafeOptionError, UnsafeProtocolError -from .util import ( - LazyMixin, +from git.util import ( + cygpath, + expand_path, + is_cygwin_git, + patch_env, + remove_password_if_present, stream_copy, ) @@ -43,6 +50,7 @@ from typing import ( Iterator, List, Mapping, + Optional, Sequence, TYPE_CHECKING, TextIO, @@ -52,12 +60,11 @@ from typing import ( overload, ) -from git.types import PathLike, Literal, TBD +from git.types import Literal, PathLike, TBD if TYPE_CHECKING: - from git.repo.base import Repo from git.diff import DiffIndex - + from git.repo.base import Repo # --------------------------------------------------------------------------------- @@ -66,10 +73,10 @@ execute_kwargs = { "with_extended_output", "with_exceptions", "as_process", - "stdout_as_string", "output_stream", - "with_stdout", + "stdout_as_string", "kill_after_timeout", + "with_stdout", "universal_newlines", "shell", "env", @@ -77,10 +84,7 @@ execute_kwargs = { "strip_newline_in_stdout", } -log = logging.getLogger(__name__) -log.addHandler(logging.NullHandler()) - -__all__ = ("Git",) +_logger = logging.getLogger(__name__) # ============================================================================== @@ -99,26 +103,39 @@ def handle_process_output( Callable[[bytes, "Repo", "DiffIndex"], None], ], stderr_handler: Union[None, Callable[[AnyStr], None], Callable[[List[AnyStr]], None]], - finalizer: Union[None, Callable[[Union[subprocess.Popen, "Git.AutoInterrupt"]], None]] = None, + finalizer: Union[None, Callable[[Union[Popen, "Git.AutoInterrupt"]], None]] = None, decode_streams: bool = True, kill_after_timeout: Union[None, float] = None, ) -> None: - """Registers for notifications to learn that process output is ready to read, and dispatches lines to - the respective line handlers. - This function returns once the finalizer returns - - :return: result of finalizer - :param process: subprocess.Popen instance - :param stdout_handler: f(stdout_line_string), or None - :param stderr_handler: f(stderr_line_string), or None - :param finalizer: f(proc) - wait for proc to finish + R"""Register for notifications to learn that process output is ready to read, and + dispatch lines to the respective line handlers. + + This function returns once the finalizer returns. + + :param process: + :class:`subprocess.Popen` instance. + + :param stdout_handler: + f(stdout_line_string), or ``None``. + + :param stderr_handler: + f(stderr_line_string), or ``None``. + + :param finalizer: + f(proc) - wait for proc to finish. + :param decode_streams: - Assume stdout/stderr streams are binary and decode them before pushing \ - their contents to handlers. - Set it to False if `universal_newline == True` (then streams are in text-mode) - or if decoding must happen later (i.e. for Diffs). + Assume stdout/stderr streams are binary and decode them before pushing their + contents to handlers. + + This defaults to ``True``. Set it to ``False`` if: + + - ``universal_newlines == True``, as then streams are in text mode, or + - decoding must happen later, such as for :class:`~git.diff.Diff`\s. + :param kill_after_timeout: - float or None, Default = None + :class:`float` or ``None``, Default = ``None`` + To specify a timeout in seconds for the git command, after which the process should be killed. """ @@ -142,9 +159,9 @@ def handle_process_output( handler(line) except Exception as ex: - log.error(f"Pumping {name!r} of cmd({remove_password_if_present(cmdline)}) failed due to: {ex!r}") + _logger.error(f"Pumping {name!r} of cmd({remove_password_if_present(cmdline)}) failed due to: {ex!r}") if "I/O operation on closed file" not in str(ex): - # Only reraise if the error was not due to the stream closing + # Only reraise if the error was not due to the stream closing. raise CommandError([f"<{name}-pump>"] + remove_password_if_present(cmdline), ex) from ex finally: stream.close() @@ -155,7 +172,7 @@ def handle_process_output( p_stdout = process.proc.stdout if process.proc else None p_stderr = process.proc.stderr if process.proc else None else: - process = cast(Popen, process) # type: ignore [redundant-cast] + process = cast(Popen, process) # type: ignore[redundant-cast] cmdline = getattr(process, "args", "") p_stdout = process.stdout p_stderr = process.stderr @@ -177,14 +194,13 @@ def handle_process_output( t.start() threads.append(t) - ## FIXME: Why Join?? Will block if `stdin` needs feeding... - # + # FIXME: Why join? Will block if stdin needs feeding... for t in threads: t.join(timeout=kill_after_timeout) if t.is_alive(): if isinstance(process, Git.AutoInterrupt): process._terminate() - else: # Don't want to deal with the other case + else: # Don't want to deal with the other case. raise RuntimeError( "Thread join() timed out in cmd.handle_process_output()." f" kill_after_timeout={kill_after_timeout} seconds" @@ -194,17 +210,85 @@ def handle_process_output( "error: process killed because it timed out." f" kill_after_timeout={kill_after_timeout} seconds" ) if not decode_streams and isinstance(p_stderr, BinaryIO): - # Assume stderr_handler needs binary input + # Assume stderr_handler needs binary input. error_str = cast(str, error_str) error_str = error_str.encode() - # We ignore typing on the next line because mypy does not like - # the way we inferred that stderr takes str or bytes - stderr_handler(error_str) # type: ignore + # We ignore typing on the next line because mypy does not like the way + # we inferred that stderr takes str or bytes. + stderr_handler(error_str) # type: ignore[arg-type] if finalizer: - return finalizer(process) - else: - return None + finalizer(process) + + +safer_popen: Callable[..., Popen] + +if sys.platform == "win32": + + def _safer_popen_windows( + command: Union[str, Sequence[Any]], + *, + shell: bool = False, + env: Optional[Mapping[str, str]] = None, + **kwargs: Any, + ) -> Popen: + """Call :class:`subprocess.Popen` on Windows but don't include a CWD in the + search. + + This avoids an untrusted search path condition where a file like ``git.exe`` in + a malicious repository would be run when GitPython operates on the repository. + The process using GitPython may have an untrusted repository's working tree as + its current working directory. Some operations may temporarily change to that + directory before running a subprocess. In addition, while by default GitPython + does not run external commands with a shell, it can be made to do so, in which + case the CWD of the subprocess, which GitPython usually sets to a repository + working tree, can itself be searched automatically by the shell. This wrapper + covers all those cases. + + :note: + This currently works by setting the + :envvar:`NoDefaultCurrentDirectoryInExePath` environment variable during + subprocess creation. It also takes care of passing Windows-specific process + creation flags, but that is unrelated to path search. + + :note: + The current implementation contains a race condition on :attr:`os.environ`. + GitPython isn't thread-safe, but a program using it on one thread should + ideally be able to mutate :attr:`os.environ` on another, without + unpredictable results. See comments in: + https://github.com/gitpython-developers/GitPython/pull/1650 + """ + # CREATE_NEW_PROCESS_GROUP is needed for some ways of killing it afterwards. + # https://docs.python.org/3/library/subprocess.html#subprocess.Popen.send_signal + # https://docs.python.org/3/library/subprocess.html#subprocess.CREATE_NEW_PROCESS_GROUP + creationflags = subprocess.CREATE_NO_WINDOW | subprocess.CREATE_NEW_PROCESS_GROUP + + # When using a shell, the shell is the direct subprocess, so the variable must + # be set in its environment, to affect its search behavior. + if shell: + # The original may be immutable, or the caller may reuse it. Mutate a copy. + env = {} if env is None else dict(env) + env["NoDefaultCurrentDirectoryInExePath"] = "1" # The "1" can be an value. + + # When not using a shell, the current process does the search in a + # CreateProcessW API call, so the variable must be set in our environment. With + # a shell, that's unnecessary if https://github.com/python/cpython/issues/101283 + # is patched. In Python versions where it is unpatched, and in the rare case the + # ComSpec environment variable is unset, the search for the shell itself is + # unsafe. Setting NoDefaultCurrentDirectoryInExePath in all cases, as done here, + # is simpler and protects against that. (As above, the "1" can be any value.) + with patch_env("NoDefaultCurrentDirectoryInExePath", "1"): + return Popen( + command, + shell=shell, + env=env, + creationflags=creationflags, + **kwargs, + ) + + safer_popen = _safer_popen_windows +else: + safer_popen = Popen def dashify(string: str) -> str: @@ -224,21 +308,80 @@ def dict_to_slots_and__excluded_are_none(self: object, d: Mapping[str, Any], exc ## -- End Utilities -- @} +_USE_SHELL_DEFAULT_MESSAGE = ( + "Git.USE_SHELL is deprecated, because only its default value of False is safe. " + "It will be removed in a future release." +) + +_USE_SHELL_DANGER_MESSAGE = ( + "Setting Git.USE_SHELL to True is unsafe and insecure, as the effect of special " + "shell syntax cannot usually be accounted for. This can result in a command " + "injection vulnerability and arbitrary code execution. Git.USE_SHELL is deprecated " + "and will be removed in a future release." +) -# value of Windows process creation flag taken from MSDN -CREATE_NO_WINDOW = 0x08000000 -## CREATE_NEW_PROCESS_GROUP is needed to allow killing it afterwards, -# see https://docs.python.org/3/library/subprocess.html#subprocess.Popen.send_signal -PROC_CREATIONFLAGS = ( - CREATE_NO_WINDOW | subprocess.CREATE_NEW_PROCESS_GROUP if is_win else 0 # type: ignore[attr-defined] -) # mypy error if not windows +def _warn_use_shell(extra_danger: bool) -> None: + warnings.warn( + _USE_SHELL_DANGER_MESSAGE if extra_danger else _USE_SHELL_DEFAULT_MESSAGE, + DeprecationWarning, + stacklevel=3, + ) -class Git(LazyMixin): +class _GitMeta(type): + """Metaclass for :class:`Git`. + This helps issue :class:`DeprecationWarning` if :attr:`Git.USE_SHELL` is used. """ - The Git class manages communication with the Git binary. + + def __getattribute(cls, name: str) -> Any: + if name == "USE_SHELL": + _warn_use_shell(False) + return super().__getattribute__(name) + + def __setattr(cls, name: str, value: Any) -> Any: + if name == "USE_SHELL": + _warn_use_shell(value) + super().__setattr__(name, value) + + if not TYPE_CHECKING: + # To preserve static checking for undefined/misspelled attributes while letting + # the methods' bodies be type-checked, these are defined as non-special methods, + # then bound to special names out of view of static type checkers. (The original + # names invoke name mangling (leading "__") to avoid confusion in other scopes.) + __getattribute__ = __getattribute + __setattr__ = __setattr + + +GitMeta = _GitMeta +"""Alias of :class:`Git`'s metaclass, whether it is :class:`type` or a custom metaclass. + +Whether the :class:`Git` class has the default :class:`type` as its metaclass or uses a +custom metaclass is not documented and may change at any time. This statically checkable +metaclass alias is equivalent at runtime to ``type(Git)``. This should almost never be +used. Code that benefits from it is likely to be remain brittle even if it is used. + +In view of the :class:`Git` class's intended use and :class:`Git` objects' dynamic +callable attributes representing git subcommands, it rarely makes sense to inherit from +:class:`Git` at all. Using :class:`Git` in multiple inheritance can be especially tricky +to do correctly. Attempting uses of :class:`Git` where its metaclass is relevant, such +as when a sibling class has an unrelated metaclass and a shared lower bound metaclass +might have to be introduced to solve a metaclass conflict, is not recommended. + +:note: + The correct static type of the :class:`Git` class itself, and any subclasses, is + ``Type[Git]``. (This can be written as ``type[Git]`` in Python 3.9 later.) + + :class:`GitMeta` should never be used in any annotation where ``Type[Git]`` is + intended or otherwise possible to use. This alias is truly only for very rare and + inherently precarious situations where it is necessary to deal with the metaclass + explicitly. +""" + + +class Git(metaclass=_GitMeta): + """The Git class manages communication with the Git binary. It provides a convenient interface to calling the Git binary, such as in:: @@ -246,10 +389,11 @@ class Git(LazyMixin): g.init() # calls 'git init' program rval = g.ls_files() # calls 'git ls-files' program - ``Debugging`` - Set the GIT_PYTHON_TRACE environment variable print each invocation - of the command to stdout. - Set its value to 'full' to see details about the returned values. + Debugging: + + * Set the :envvar:`GIT_PYTHON_TRACE` environment variable to print each invocation + of the command to stdout. + * Set its value to ``full`` to see details about the returned values. """ __slots__ = ( @@ -257,14 +401,20 @@ class Git(LazyMixin): "cat_file_all", "cat_file_header", "_version_info", + "_version_info_token", "_git_options", "_persistent_git_options", "_environment", ) - _excluded_ = ("cat_file_all", "cat_file_header", "_version_info") + _excluded_ = ( + "cat_file_all", + "cat_file_header", + "_version_info", + "_version_info_token", + ) - re_unsafe_protocol = re.compile("(.+)::.+") + re_unsafe_protocol = re.compile(r"(.+)::.+") def __getstate__(self) -> Dict[str, Any]: return slots_to_dict(self, exclude=self._excluded_) @@ -274,46 +424,136 @@ class Git(LazyMixin): # CONFIGURATION - git_exec_name = "git" # default that should work on linux and windows + git_exec_name = "git" + """Default git command that should work on Linux, Windows, and other systems.""" - # Enables debugging of GitPython's git commands GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False) + """Enables debugging of GitPython's git commands.""" + + USE_SHELL: bool = False + """Deprecated. If set to ``True``, a shell will be used when executing git commands. + + Code that uses ``USE_SHELL = True`` or that passes ``shell=True`` to any GitPython + functions should be updated to use the default value of ``False`` instead. ``True`` + is unsafe unless the effect of syntax treated specially by the shell is fully + considered and accounted for, which is not possible under most circumstances. As + detailed below, it is also no longer needed, even where it had been in the past. + + It is in many if not most cases a command injection vulnerability for an application + to set :attr:`USE_SHELL` to ``True``. Any attacker who can cause a specially crafted + fragment of text to make its way into any part of any argument to any git command + (including paths, branch names, etc.) can cause the shell to read and write + arbitrary files and execute arbitrary commands. Innocent input may also accidentally + contain special shell syntax, leading to inadvertent malfunctions. + + In addition, how a value of ``True`` interacts with some aspects of GitPython's + operation is not precisely specified and may change without warning, even before + GitPython 4.0.0 when :attr:`USE_SHELL` may be removed. This includes: + + * Whether or how GitPython automatically customizes the shell environment. + + * Whether, outside of Windows (where :class:`subprocess.Popen` supports lists of + separate arguments even when ``shell=True``), this can be used with any GitPython + functionality other than direct calls to the :meth:`execute` method. + + * Whether any GitPython feature that runs git commands ever attempts to partially + sanitize data a shell may treat specially. Currently this is not done. + + Prior to GitPython 2.0.8, this had a narrow purpose in suppressing console windows + in graphical Windows applications. In 2.0.8 and higher, it provides no benefit, as + GitPython solves that problem more robustly and safely by using the + ``CREATE_NO_WINDOW`` process creation flag on Windows. + + Because Windows path search differs subtly based on whether a shell is used, in rare + cases changing this from ``True`` to ``False`` may keep an unusual git "executable", + such as a batch file, from being found. To fix this, set the command name or full + path in the :envvar:`GIT_PYTHON_GIT_EXECUTABLE` environment variable or pass the + full path to :func:`git.refresh` (or invoke the script using a ``.exe`` shim). + + Further reading: + + * :meth:`Git.execute` (on the ``shell`` parameter). + * https://github.com/gitpython-developers/GitPython/commit/0d9390866f9ce42870d3116094cd49e0019a970a + * https://learn.microsoft.com/en-us/windows/win32/procthread/process-creation-flags + * https://github.com/python/cpython/issues/91558#issuecomment-1100942950 + * https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-createprocessw + """ - # If True, a shell will be used when executing git commands. - # This should only be desirable on Windows, see https://github.com/gitpython-developers/GitPython/pull/126 - # and check `git/test_repo.py:TestRepo.test_untracked_files()` TC for an example where it is required. - # Override this value using `Git.USE_SHELL = True` - USE_SHELL = False - - # Provide the full path to the git executable. Otherwise it assumes git is in the path _git_exec_env_var = "GIT_PYTHON_GIT_EXECUTABLE" _refresh_env_var = "GIT_PYTHON_REFRESH" + GIT_PYTHON_GIT_EXECUTABLE = None - # note that the git executable is actually found during the refresh step in - # the top level __init__ + """Provide the full path to the git executable. Otherwise it assumes git is in the + executable search path. + + :note: + The git executable is actually found during the refresh step in the top level + ``__init__``. It can also be changed by explicitly calling :func:`git.refresh`. + """ + + _refresh_token = object() # Since None would match an initial _version_info_token. @classmethod def refresh(cls, path: Union[None, PathLike] = None) -> bool: - """This gets called by the refresh function (see the top level - __init__). + """Update information about the git executable :class:`Git` objects will use. + + Called by the :func:`git.refresh` function in the top level ``__init__``. + + :param path: + Optional path to the git executable. If not absolute, it is resolved + immediately, relative to the current directory. (See note below.) + + :note: + The top-level :func:`git.refresh` should be preferred because it calls this + method and may also update other state accordingly. + + :note: + There are three different ways to specify the command that refreshing causes + to be used for git: + + 1. Pass no `path` argument and do not set the + :envvar:`GIT_PYTHON_GIT_EXECUTABLE` environment variable. The command + name ``git`` is used. It is looked up in a path search by the system, in + each command run (roughly similar to how git is found when running + ``git`` commands manually). This is usually the desired behavior. + + 2. Pass no `path` argument but set the :envvar:`GIT_PYTHON_GIT_EXECUTABLE` + environment variable. The command given as the value of that variable is + used. This may be a simple command or an arbitrary path. It is looked up + in each command run. Setting :envvar:`GIT_PYTHON_GIT_EXECUTABLE` to + ``git`` has the same effect as not setting it. + + 3. Pass a `path` argument. This path, if not absolute, is immediately + resolved, relative to the current directory. This resolution occurs at + the time of the refresh. When git commands are run, they are run using + that previously resolved path. If a `path` argument is passed, the + :envvar:`GIT_PYTHON_GIT_EXECUTABLE` environment variable is not + consulted. + + :note: + Refreshing always sets the :attr:`Git.GIT_PYTHON_GIT_EXECUTABLE` class + attribute, which can be read on the :class:`Git` class or any of its + instances to check what command is used to run git. This attribute should + not be confused with the related :envvar:`GIT_PYTHON_GIT_EXECUTABLE` + environment variable. The class attribute is set no matter how refreshing is + performed. """ - # discern which path to refresh with + # Discern which path to refresh with. if path is not None: new_git = os.path.expanduser(path) new_git = os.path.abspath(new_git) else: new_git = os.environ.get(cls._git_exec_env_var, cls.git_exec_name) - # keep track of the old and new git executable path + # Keep track of the old and new git executable path. old_git = cls.GIT_PYTHON_GIT_EXECUTABLE + old_refresh_token = cls._refresh_token cls.GIT_PYTHON_GIT_EXECUTABLE = new_git + cls._refresh_token = object() - # test if the new git executable path is valid - - # - a GitCommandNotFound error is spawned by ourselves - # - a PermissionError is spawned if the git executable provided - # cannot be executed for whatever reason - + # Test if the new git executable path is valid. A GitCommandNotFound error is + # raised by us. A PermissionError is raised if the git executable cannot be + # executed for whatever reason. has_git = False try: cls().version() @@ -321,7 +561,7 @@ class Git(LazyMixin): except (GitCommandNotFound, PermissionError): pass - # warn or raise exception if test failed + # Warn or raise exception if test failed. if not has_git: err = ( dedent( @@ -330,97 +570,92 @@ class Git(LazyMixin): The git executable must be specified in one of the following ways: - be included in your $PATH - be set via $%s - - explicitly set via git.refresh() + - explicitly set via git.refresh(<full-path-to-git-executable>) """ ) % cls._git_exec_env_var ) - # revert to whatever the old_git was + # Revert to whatever the old_git was. cls.GIT_PYTHON_GIT_EXECUTABLE = old_git + cls._refresh_token = old_refresh_token if old_git is None: - # on the first refresh (when GIT_PYTHON_GIT_EXECUTABLE is - # None) we only are quiet, warn, or error depending on the - # GIT_PYTHON_REFRESH value + # On the first refresh (when GIT_PYTHON_GIT_EXECUTABLE is None) we only + # are quiet, warn, or error depending on the GIT_PYTHON_REFRESH value. - # determine what the user wants to happen during the initial - # refresh we expect GIT_PYTHON_REFRESH to either be unset or - # be one of the following values: - # 0|q|quiet|s|silence - # 1|w|warn|warning - # 2|r|raise|e|error + # Determine what the user wants to happen during the initial refresh. We + # expect GIT_PYTHON_REFRESH to either be unset or be one of the + # following values: + # + # 0|q|quiet|s|silence|silent|n|none + # 1|w|warn|warning|l|log + # 2|r|raise|e|error|exception mode = os.environ.get(cls._refresh_env_var, "raise").lower() - quiet = ["quiet", "q", "silence", "s", "none", "n", "0"] - warn = ["warn", "w", "warning", "1"] - error = ["error", "e", "raise", "r", "2"] + quiet = ["quiet", "q", "silence", "s", "silent", "none", "n", "0"] + warn = ["warn", "w", "warning", "log", "l", "1"] + error = ["error", "e", "exception", "raise", "r", "2"] if mode in quiet: pass elif mode in warn or mode in error: - err = ( - dedent( - """\ + err = dedent( + """\ %s All git commands will error until this is rectified. - This initial warning can be silenced or aggravated in the future by setting the + This initial message can be silenced or aggravated in the future by setting the $%s environment variable. Use one of the following values: - - %s: for no warning or exception - - %s: for a printed warning + - %s: for no message or exception + - %s: for a warning message (logging level CRITICAL, displayed by default) - %s: for a raised exception Example: export %s=%s """ - ) - % ( - err, - cls._refresh_env_var, - "|".join(quiet), - "|".join(warn), - "|".join(error), - cls._refresh_env_var, - quiet[0], - ) + ) % ( + err, + cls._refresh_env_var, + "|".join(quiet), + "|".join(warn), + "|".join(error), + cls._refresh_env_var, + quiet[0], ) if mode in warn: - print("WARNING: %s" % err) + _logger.critical(err) else: raise ImportError(err) else: - err = ( - dedent( - """\ + err = dedent( + """\ %s environment variable has been set but it has been set with an invalid value. Use only the following values: - - %s: for no warning or exception - - %s: for a printed warning + - %s: for no message or exception + - %s: for a warning message (logging level CRITICAL, displayed by default) - %s: for a raised exception """ - ) - % ( - cls._refresh_env_var, - "|".join(quiet), - "|".join(warn), - "|".join(error), - ) + ) % ( + cls._refresh_env_var, + "|".join(quiet), + "|".join(warn), + "|".join(error), ) raise ImportError(err) - # we get here if this was the init refresh and the refresh mode - # was not error, go ahead and set the GIT_PYTHON_GIT_EXECUTABLE - # such that we discern the difference between a first import - # and a second import + # We get here if this was the initial refresh and the refresh mode was + # not error. Go ahead and set the GIT_PYTHON_GIT_EXECUTABLE such that we + # discern the difference between the first refresh at import time + # and subsequent calls to git.refresh or this refresh method. cls.GIT_PYTHON_GIT_EXECUTABLE = cls.git_exec_name else: - # after the first refresh (when GIT_PYTHON_GIT_EXECUTABLE - # is no longer None) we raise an exception - raise GitCommandNotFound("git", err) + # After the first refresh (when GIT_PYTHON_GIT_EXECUTABLE is no longer + # None) we raise an exception. + raise GitCommandNotFound(new_git, err) return has_git @@ -430,28 +665,26 @@ class Git(LazyMixin): @overload @classmethod - def polish_url(cls, url: str, is_cygwin: Literal[False] = ...) -> str: - ... + def polish_url(cls, url: str, is_cygwin: Literal[False] = ...) -> str: ... @overload @classmethod - def polish_url(cls, url: str, is_cygwin: Union[None, bool] = None) -> str: - ... + def polish_url(cls, url: str, is_cygwin: Union[None, bool] = None) -> str: ... @classmethod def polish_url(cls, url: str, is_cygwin: Union[None, bool] = None) -> PathLike: + """Remove any backslashes from URLs to be written in config files. + + Windows might create config files containing paths with backslashes, but git + stops liking them as it will escape the backslashes. Hence we undo the escaping + just to be sure. + """ if is_cygwin is None: is_cygwin = cls.is_cygwin() if is_cygwin: url = cygpath(url) else: - """Remove any backslahes from urls to be written in config files. - - Windows might create config-files containing paths with backslashed, - but git stops liking them as it will escape the backslashes. - Hence we undo the escaping just to be sure. - """ url = os.path.expandvars(url) if url.startswith("~"): url = os.path.expanduser(url) @@ -460,12 +693,11 @@ class Git(LazyMixin): @classmethod def check_unsafe_protocols(cls, url: str) -> None: - """ - Check for unsafe protocols. + """Check for unsafe protocols. - Apart from the usual protocols (http, git, ssh), - Git allows "remote helpers" that have the form `<transport>::<address>`, - one of these helpers (`ext::`) can be used to invoke any arbitrary command. + Apart from the usual protocols (http, git, ssh), Git allows "remote helpers" + that have the form ``<transport>::<address>``. One of these helpers (``ext::``) + can be used to invoke any arbitrary command. See: @@ -481,14 +713,13 @@ class Git(LazyMixin): @classmethod def check_unsafe_options(cls, options: List[str], unsafe_options: List[str]) -> None: - """ - Check for unsafe options. + """Check for unsafe options. - Some options that are passed to `git <command>` can be used to execute - arbitrary commands, this are blocked by default. + Some options that are passed to ``git <command>`` can be used to execute + arbitrary commands. These are blocked by default. """ - # Options can be of the form `foo` or `--foo bar` `--foo=bar`, - # so we need to check if they start with "--foo" or if they are equal to "foo". + # Options can be of the form `foo`, `--foo bar`, or `--foo=bar`, so we need to + # check if they start with "--foo" or if they are equal to "foo". bare_unsafe_options = [option.lstrip("-") for option in unsafe_options] for option in options: for unsafe_option, bare_option in zip(unsafe_options, bare_unsafe_options): @@ -497,18 +728,22 @@ class Git(LazyMixin): f"{unsafe_option} is not allowed, use `allow_unsafe_options=True` to allow it." ) - class AutoInterrupt(object): - """Kill/Interrupt the stored process instance once this instance goes out of scope. It is - used to prevent processes piling up in case iterators stop reading. - Besides all attributes are wired through to the contained process object. + class AutoInterrupt: + """Process wrapper that terminates the wrapped process on finalization. + + This kills/interrupts the stored process instance once this instance goes out of + scope. It is used to prevent processes piling up in case iterators stop reading. - The wait method was overridden to perform automatic status code checking - and possibly raise.""" + All attributes are wired through to the contained process object. + + The wait method is overridden to perform automatic status code checking and + possibly raise. + """ __slots__ = ("proc", "args", "status") - # If this is non-zero it will override any status code during - # _terminate, used to prevent race conditions in testing + # If this is non-zero it will override any status code during _terminate, used + # to prevent race conditions in testing. _status_code_if_terminate: int = 0 def __init__(self, proc: Union[None, subprocess.Popen], args: Any) -> None: @@ -517,7 +752,7 @@ class Git(LazyMixin): self.status: Union[int, None] = None def _terminate(self) -> None: - """Terminate the underlying process""" + """Terminate the underlying process.""" if self.proc is None: return @@ -529,36 +764,26 @@ class Git(LazyMixin): proc.stdout.close() if proc.stderr: proc.stderr.close() - # did the process finish already so we have a return code ? + # Did the process finish already so we have a return code? try: if proc.poll() is not None: self.status = self._status_code_if_terminate or proc.poll() - return None + return except OSError as ex: - log.info("Ignored error after process had died: %r", ex) + _logger.info("Ignored error after process had died: %r", ex) - # can be that nothing really exists anymore ... + # It can be that nothing really exists anymore... if os is None or getattr(os, "kill", None) is None: - return None + return - # try to kill it + # Try to kill it. try: proc.terminate() - status = proc.wait() # ensure process goes away + status = proc.wait() # Ensure the process goes away. self.status = self._status_code_if_terminate or status except OSError as ex: - log.info("Ignored error after process had died: %r", ex) - except AttributeError: - # try windows - # for some reason, providing None for stdout/stderr still prints something. This is why - # we simply use the shell and redirect to nul. Its slower than CreateProcess, question - # is whether we really want to see all these messages. Its annoying no matter what. - if is_win: - call( - ("TASKKILL /F /T /PID %s 2>nul 1>nul" % str(proc.pid)), - shell=True, - ) + _logger.info("Ignored error after process had died: %r", ex) # END exception handling def __del__(self) -> None: @@ -571,9 +796,16 @@ class Git(LazyMixin): def wait(self, stderr: Union[None, str, bytes] = b"") -> int: """Wait for the process and return its status code. - :param stderr: Previously read value of stderr, in case stderr is already closed. - :warn: may deadlock if output or error pipes are used and not handled separately. - :raise GitCommandError: if the return status is not 0""" + :param stderr: + Previously read value of stderr, in case stderr is already closed. + + :warn: + May deadlock if output or error pipes are used and not handled + separately. + + :raise git.exc.GitCommandError: + If the return status is not 0. + """ if stderr is None: stderr_b = b"" stderr_b = force_bytes(data=stderr, encoding="utf-8") @@ -581,7 +813,7 @@ class Git(LazyMixin): if self.proc is not None: status = self.proc.wait() p_stderr = self.proc.stderr - else: # Assume the underlying proc was killed earlier or never existed + else: # Assume the underlying proc was killed earlier or never existed. status = self.status p_stderr = None @@ -598,30 +830,32 @@ class Git(LazyMixin): if status != 0: errstr = read_all_from_possibly_closed_stream(p_stderr) - log.debug("AutoInterrupt wait stderr: %r" % (errstr,)) + _logger.debug("AutoInterrupt wait stderr: %r" % (errstr,)) raise GitCommandError(remove_password_if_present(self.args), status, errstr) return status # END auto interrupt - class CatFileContentStream(object): - + class CatFileContentStream: """Object representing a sized read-only stream returning the contents of an object. - It behaves like a stream, but counts the data read and simulates an empty + + This behaves like a stream, but counts the data read and simulates an empty stream once our sized content region is empty. - If not all data is read to the end of the objects's lifetime, we read the - rest to assure the underlying stream continues to work""" - __slots__: Tuple[str, ...] = ("_stream", "_nbr", "_size") + If not all data are read to the end of the object's lifetime, we read the + rest to ensure the underlying stream continues to work. + """ + + __slots__ = ("_stream", "_nbr", "_size") def __init__(self, size: int, stream: IO[bytes]) -> None: self._stream = stream self._size = size - self._nbr = 0 # num bytes read + self._nbr = 0 # Number of bytes read. - # special case: if the object is empty, has null bytes, get the - # final newline right away. + # Special case: If the object is empty, has null bytes, get the final + # newline right away. if size == 0: stream.read(1) # END handle empty streams @@ -631,16 +865,17 @@ class Git(LazyMixin): if bytes_left == 0: return b"" if size > -1: - # assure we don't try to read past our limit + # Ensure we don't try to read past our limit. size = min(bytes_left, size) else: - # they try to read all, make sure its not more than what remains + # They try to read all, make sure it's not more than what remains. size = bytes_left # END check early depletion data = self._stream.read(size) self._nbr += len(data) - # check for depletion, read our final byte to make the stream usable by others + # Check for depletion, read our final byte to make the stream usable by + # others. if self._size - self._nbr == 0: self._stream.read(1) # final newline # END finish reading @@ -650,7 +885,7 @@ class Git(LazyMixin): if self._nbr == self._size: return b"" - # clamp size to lowest allowed value + # Clamp size to lowest allowed value. bytes_left = self._size - self._nbr if size > -1: size = min(bytes_left, size) @@ -661,7 +896,7 @@ class Git(LazyMixin): data = self._stream.readline(size) self._nbr += len(data) - # handle final byte + # Handle final byte. if self._size - self._nbr == 0: self._stream.read(1) # END finish reading @@ -672,7 +907,7 @@ class Git(LazyMixin): if self._nbr == self._size: return [] - # leave all additional logic to our readline method, we just check the size + # Leave all additional logic to our readline method, we just check the size. out = [] nbr = 0 while True: @@ -704,20 +939,21 @@ class Git(LazyMixin): def __del__(self) -> None: bytes_left = self._size - self._nbr if bytes_left: - # read and discard - seeking is impossible within a stream - # includes terminating newline + # Read and discard - seeking is impossible within a stream. + # This includes any terminating newline. self._stream.read(bytes_left + 1) # END handle incomplete read - def __init__(self, working_dir: Union[None, PathLike] = None): + def __init__(self, working_dir: Union[None, PathLike] = None) -> None: """Initialize this instance with: :param working_dir: - Git directory we should work in. If None, we always work in the current - directory as returned by os.getcwd(). - It is meant to be the working tree directory if available, or the - .git directory in case of bare repositories.""" - super(Git, self).__init__() + Git directory we should work in. If ``None``, we always work in the current + directory as returned by :func:`os.getcwd`. + This is meant to be the working tree directory if available, or the + ``.git`` directory in case of bare repositories. + """ + super().__init__() self._working_dir = expand_path(working_dir) self._git_options: Union[List[str], Tuple[str, ...]] = () self._persistent_git_options: List[str] = [] @@ -725,62 +961,82 @@ class Git(LazyMixin): # Extra environment variables to pass to git commands self._environment: Dict[str, str] = {} - # cached command slots + # Cached version slots + self._version_info: Union[Tuple[int, ...], None] = None + self._version_info_token: object = None + + # Cached command slots self.cat_file_header: Union[None, TBD] = None self.cat_file_all: Union[None, TBD] = None + def __getattribute__(self, name: str) -> Any: + if name == "USE_SHELL": + _warn_use_shell(False) + return super().__getattribute__(name) + def __getattr__(self, name: str) -> Any: - """A convenience method as it allows to call the command as if it was - an object. + """A convenience method as it allows to call the command as if it was an object. - :return: Callable object that will execute call _call_process with your arguments.""" - if name[0] == "_": - return LazyMixin.__getattr__(self, name) + :return: + Callable object that will execute call :meth:`_call_process` with your + arguments. + """ + if name.startswith("_"): + return super().__getattribute__(name) return lambda *args, **kwargs: self._call_process(name, *args, **kwargs) def set_persistent_git_options(self, **kwargs: Any) -> None: - """Specify command line options to the git executable - for subsequent subcommand calls + """Specify command line options to the git executable for subsequent + subcommand calls. :param kwargs: - is a dict of keyword arguments. - these arguments are passed as in _call_process - but will be passed to the git command rather than - the subcommand. + A dict of keyword arguments. + These arguments are passed as in :meth:`_call_process`, but will be passed + to the git command rather than the subcommand. """ self._persistent_git_options = self.transform_kwargs(split_single_char_options=True, **kwargs) - def _set_cache_(self, attr: str) -> None: - if attr == "_version_info": - # We only use the first 4 numbers, as everything else could be strings in fact (on windows) - process_version = self._call_process("version") # should be as default *args and **kwargs used - version_numbers = process_version.split(" ")[2] - - self._version_info = cast( - Tuple[int, int, int, int], - tuple(int(n) for n in version_numbers.split(".")[:4] if n.isdigit()), - ) - else: - super(Git, self)._set_cache_(attr) - # END handle version info - @property def working_dir(self) -> Union[None, PathLike]: """:return: Git directory we are working on""" return self._working_dir @property - def version_info(self) -> Tuple[int, int, int, int]: + def version_info(self) -> Tuple[int, ...]: + """ + :return: Tuple with integers representing the major, minor and additional + version numbers as parsed from :manpage:`git-version(1)`. Up to four fields + are used. + + This value is generated on demand and is cached. """ - :return: tuple(int, int, int, int) tuple with integers representing the major, minor - and additional version numbers as parsed from git version. - This value is generated on demand and is cached""" + # Refreshing is global, but version_info caching is per-instance. + refresh_token = self._refresh_token # Copy token in case of concurrent refresh. + + # Use the cached version if obtained after the most recent refresh. + if self._version_info_token is refresh_token: + assert self._version_info is not None, "Bug: corrupted token-check state" + return self._version_info + + # Run "git version" and parse it. + process_version = self._call_process("version") + version_string = process_version.split(" ")[2] + version_fields = version_string.split(".")[:4] + leading_numeric_fields = itertools.takewhile(str.isdigit, version_fields) + self._version_info = tuple(map(int, leading_numeric_fields)) + + # This value will be considered valid until the next refresh. + self._version_info_token = refresh_token return self._version_info @overload - def execute(self, command: Union[str, Sequence[Any]], *, as_process: Literal[True]) -> "AutoInterrupt": - ... + def execute( + self, + command: Union[str, Sequence[Any]], + *, + as_process: Literal[True], + ) -> "AutoInterrupt": ... @overload def execute( @@ -789,8 +1045,7 @@ class Git(LazyMixin): *, as_process: Literal[False] = False, stdout_as_string: Literal[True], - ) -> Union[str, Tuple[int, str, str]]: - ... + ) -> Union[str, Tuple[int, str, str]]: ... @overload def execute( @@ -799,8 +1054,7 @@ class Git(LazyMixin): *, as_process: Literal[False] = False, stdout_as_string: Literal[False] = False, - ) -> Union[bytes, Tuple[int, bytes, str]]: - ... + ) -> Union[bytes, Tuple[int, bytes, str]]: ... @overload def execute( @@ -810,8 +1064,7 @@ class Git(LazyMixin): with_extended_output: Literal[False], as_process: Literal[False], stdout_as_string: Literal[True], - ) -> str: - ... + ) -> str: ... @overload def execute( @@ -821,8 +1074,7 @@ class Git(LazyMixin): with_extended_output: Literal[False], as_process: Literal[False], stdout_as_string: Literal[False], - ) -> bytes: - ... + ) -> bytes: ... def execute( self, @@ -842,16 +1094,16 @@ class Git(LazyMixin): strip_newline_in_stdout: bool = True, **subprocess_kwargs: Any, ) -> Union[str, bytes, Tuple[int, Union[str, bytes], str], AutoInterrupt]: - """Handles executing the command on the shell and consumes and returns - the returned information (stdout) + R"""Handle executing the command, and consume and return the returned + information (stdout). :param command: The command argument list to execute. - It should be a string, or a sequence of program arguments. The + It should be a sequence of program arguments, or a string. The program to execute is the first item in the args sequence or string. :param istream: - Standard input filehandle passed to subprocess.Popen. + Standard input filehandle passed to :class:`subprocess.Popen`. :param with_extended_output: Whether to return a (status, stdout, stderr) tuple. @@ -861,79 +1113,102 @@ class Git(LazyMixin): :param as_process: Whether to return the created process instance directly from which - streams can be read on demand. This will render with_extended_output and - with_exceptions ineffective - the caller will have - to deal with the details himself. - It is important to note that the process will be placed into an AutoInterrupt - wrapper that will interrupt the process once it goes out of scope. If you - use the command in iterators, you should pass the whole process instance - instead of a single stream. + streams can be read on demand. This will render `with_extended_output` + and `with_exceptions` ineffective - the caller will have to deal with + the details. It is important to note that the process will be placed + into an :class:`AutoInterrupt` wrapper that will interrupt the process + once it goes out of scope. If you use the command in iterators, you + should pass the whole process instance instead of a single stream. :param output_stream: If set to a file-like object, data produced by the git command will be - output to the given stream directly. - This feature only has any effect if as_process is False. Processes will - always be created with a pipe due to issues with subprocess. - This merely is a workaround as data will be copied from the - output pipe to the given output stream directly. - Judging from the implementation, you shouldn't use this flag ! + copied to the given stream instead of being returned as a string. + This feature only has any effect if `as_process` is ``False``. :param stdout_as_string: - if False, the commands standard output will be bytes. Otherwise, it will be - decoded into a string using the default encoding (usually utf-8). + If ``False``, the command's standard output will be bytes. Otherwise, it + will be decoded into a string using the default encoding (usually UTF-8). The latter can fail, if the output contains binary data. + :param kill_after_timeout: + Specifies a timeout in seconds for the git command, after which the process + should be killed. This will have no effect if `as_process` is set to + ``True``. It is set to ``None`` by default and will let the process run + until the timeout is explicitly specified. Uses of this feature should be + carefully considered, due to the following limitations: + + 1. This feature is not supported at all on Windows. + 2. Effectiveness may vary by operating system. ``ps --ppid`` is used to + enumerate child processes, which is available on most GNU/Linux systems + but not most others. + 3. Deeper descendants do not receive signals, though they may sometimes + terminate as a consequence of their parent processes being killed. + 4. `kill_after_timeout` uses ``SIGKILL``, which can have negative side + effects on a repository. For example, stale locks in case of + :manpage:`git-gc(1)` could render the repository incapable of accepting + changes until the lock is manually removed. + + :param with_stdout: + If ``True``, default ``True``, we open stdout on the created process. + + :param universal_newlines: + If ``True``, pipes will be opened as text, and lines are split at all known + line endings. + + :param shell: + Whether to invoke commands through a shell + (see :class:`Popen(..., shell=True) <subprocess.Popen>`). + If this is not ``None``, it overrides :attr:`USE_SHELL`. + + Passing ``shell=True`` to this or any other GitPython function should be + avoided, as it is unsafe under most circumstances. This is because it is + typically not feasible to fully consider and account for the effect of shell + expansions, especially when passing ``shell=True`` to other methods that + forward it to :meth:`Git.execute`. Passing ``shell=True`` is also no longer + needed (nor useful) to work around any known operating system specific + issues. + :param env: - A dictionary of environment variables to be passed to `subprocess.Popen`. + A dictionary of environment variables to be passed to + :class:`subprocess.Popen`. :param max_chunk_size: - Maximum number of bytes in one chunk of data passed to the output_stream in - one invocation of write() method. If the given number is not positive then - the default value is used. + Maximum number of bytes in one chunk of data passed to the `output_stream` + in one invocation of its ``write()`` method. If the given number is not + positive then the default value is used. + + :param strip_newline_in_stdout: + Whether to strip the trailing ``\n`` of the command stdout. :param subprocess_kwargs: - Keyword arguments to be passed to subprocess.Popen. Please note that - some of the valid kwargs are already set by this method, the ones you + Keyword arguments to be passed to :class:`subprocess.Popen`. Please note + that some of the valid kwargs are already set by this method; the ones you specify may not be the same ones. - :param with_stdout: If True, default True, we open stdout on the created process - :param universal_newlines: - if True, pipes will be opened as text, and lines are split at - all known line endings. - :param shell: - Whether to invoke commands through a shell (see `Popen(..., shell=True)`). - It overrides :attr:`USE_SHELL` if it is not `None`. - :param kill_after_timeout: - To specify a timeout in seconds for the git command, after which the process - should be killed. This will have no effect if as_process is set to True. It is - set to None by default and will let the process run until the timeout is - explicitly specified. This feature is not supported on Windows. It's also worth - noting that kill_after_timeout uses SIGKILL, which can have negative side - effects on a repository. For example, stale locks in case of git gc could - render the repository incapable of accepting changes until the lock is manually - removed. - :param strip_newline_in_stdout: - Whether to strip the trailing ``\\n`` of the command stdout. :return: - * str(output) if extended_output = False (Default) - * tuple(int(status), str(stdout), str(stderr)) if extended_output = True + * str(output), if `extended_output` is ``False`` (Default) + * tuple(int(status), str(stdout), str(stderr)), + if `extended_output` is ``True`` + + If `output_stream` is ``True``, the stdout value will be your output stream: - if output_stream is True, the stdout value will be your output stream: - * output_stream if extended_output = False - * tuple(int(status), output_stream, str(stderr)) if extended_output = True + * output_stream, if `extended_output` is ``False`` + * tuple(int(status), output_stream, str(stderr)), + if `extended_output` is ``True`` - Note git is executed with LC_MESSAGES="C" to ensure consistent + Note that git is executed with ``LC_MESSAGES="C"`` to ensure consistent output regardless of system language. - :raise GitCommandError: + :raise git.exc.GitCommandError: :note: - If you add additional keyword arguments to the signature of this method, - you must update the execute_kwargs tuple housed in this module.""" - # Remove password for the command if present + If you add additional keyword arguments to the signature of this method, you + must update the ``execute_kwargs`` variable housed in this module. + """ + # Remove password for the command if present. redacted_command = remove_password_if_present(command) if self.GIT_PYTHON_TRACE and (self.GIT_PYTHON_TRACE != "full" or as_process): - log.info(" ".join(redacted_command)) + _logger.info(" ".join(redacted_command)) # Allow the user to have the command executed in their working dir. try: @@ -943,12 +1218,12 @@ class Git(LazyMixin): except FileNotFoundError: cwd = None - # Start the process + # Start the process. inline_env = env env = os.environ.copy() - # Attempt to force all output to plain ascii english, which is what some parsing code - # may expect. - # According to stackoverflow (http://goo.gl/l74GC8), we are setting LANGUAGE as well + # Attempt to force all output to plain ASCII English, which is what some parsing + # code may expect. + # According to https://askubuntu.com/a/311796, we are setting LANGUAGE as well # just to be sure. env["LANGUAGE"] = "C" env["LC_ALL"] = "C" @@ -956,117 +1231,121 @@ class Git(LazyMixin): if inline_env is not None: env.update(inline_env) - if is_win: - cmd_not_found_exception = OSError + if sys.platform == "win32": if kill_after_timeout is not None: raise GitCommandError( redacted_command, '"kill_after_timeout" feature is not supported on Windows.', ) - # Only search PATH, not CWD. This must be in the *caller* environment. The "1" can be any value. - maybe_patch_caller_env = patch_env("NoDefaultCurrentDirectoryInExePath", "1") + cmd_not_found_exception = OSError else: - cmd_not_found_exception = FileNotFoundError # NOQA # exists, flake8 unknown @UndefinedVariable - maybe_patch_caller_env = contextlib.nullcontext() - # end handle + cmd_not_found_exception = FileNotFoundError + # END handle stdout_sink = PIPE if with_stdout else getattr(subprocess, "DEVNULL", None) or open(os.devnull, "wb") - istream_ok = "None" - if istream: - istream_ok = "<valid stream>" - log.debug( - "Popen(%s, cwd=%s, universal_newlines=%s, shell=%s, istream=%s)", + if shell is None: + # Get the value of USE_SHELL with no deprecation warning. Do this without + # warnings.catch_warnings, to avoid a race condition with application code + # configuring warnings. The value could be looked up in type(self).__dict__ + # or Git.__dict__, but those can break under some circumstances. This works + # the same as self.USE_SHELL in more situations; see Git.__getattribute__. + shell = super().__getattribute__("USE_SHELL") + _logger.debug( + "Popen(%s, cwd=%s, stdin=%s, shell=%s, universal_newlines=%s)", redacted_command, cwd, - universal_newlines, + "<valid stream>" if istream else "None", shell, - istream_ok, + universal_newlines, ) try: - with maybe_patch_caller_env: - proc = Popen( - command, - env=env, - cwd=cwd, - bufsize=-1, - stdin=istream or DEVNULL, - stderr=PIPE, - stdout=stdout_sink, - shell=shell is not None and shell or self.USE_SHELL, - close_fds=is_posix, # unsupported on windows - universal_newlines=universal_newlines, - creationflags=PROC_CREATIONFLAGS, - **subprocess_kwargs, - ) + proc = safer_popen( + command, + env=env, + cwd=cwd, + bufsize=-1, + stdin=(istream or DEVNULL), + stderr=PIPE, + stdout=stdout_sink, + shell=shell, + universal_newlines=universal_newlines, + encoding=defenc if universal_newlines else None, + **subprocess_kwargs, + ) except cmd_not_found_exception as err: raise GitCommandNotFound(redacted_command, err) from err else: - # replace with a typeguard for Popen[bytes]? + # Replace with a typeguard for Popen[bytes]? proc.stdout = cast(BinaryIO, proc.stdout) proc.stderr = cast(BinaryIO, proc.stderr) if as_process: return self.AutoInterrupt(proc, command) - def _kill_process(pid: int) -> None: - """Callback method to kill a process.""" - p = Popen( - ["ps", "--ppid", str(pid)], - stdout=PIPE, - creationflags=PROC_CREATIONFLAGS, - ) - child_pids = [] - if p.stdout is not None: - for line in p.stdout: - if len(line.split()) > 0: - local_pid = (line.split())[0] - if local_pid.isdigit(): - child_pids.append(int(local_pid)) - try: - # Windows does not have SIGKILL, so use SIGTERM instead - sig = getattr(signal, "SIGKILL", signal.SIGTERM) - os.kill(pid, sig) - for child_pid in child_pids: - try: - os.kill(child_pid, sig) - except OSError: - pass - kill_check.set() # tell the main routine that the process was killed - except OSError: - # It is possible that the process gets completed in the duration after timeout - # happens and before we try to kill the process. - pass - return - - # end - - if kill_after_timeout is not None: + if sys.platform != "win32" and kill_after_timeout is not None: + # Help mypy figure out this is not None even when used inside communicate(). + timeout = kill_after_timeout + + def kill_process(pid: int) -> None: + """Callback to kill a process. + + This callback implementation would be ineffective and unsafe on Windows. + """ + p = Popen(["ps", "--ppid", str(pid)], stdout=PIPE) + child_pids = [] + if p.stdout is not None: + for line in p.stdout: + if len(line.split()) > 0: + local_pid = (line.split())[0] + if local_pid.isdigit(): + child_pids.append(int(local_pid)) + try: + os.kill(pid, signal.SIGKILL) + for child_pid in child_pids: + try: + os.kill(child_pid, signal.SIGKILL) + except OSError: + pass + # Tell the main routine that the process was killed. + kill_check.set() + except OSError: + # It is possible that the process gets completed in the duration + # after timeout happens and before we try to kill the process. + pass + return + + def communicate() -> Tuple[AnyStr, AnyStr]: + watchdog.start() + out, err = proc.communicate() + watchdog.cancel() + if kill_check.is_set(): + err = 'Timeout: the command "%s" did not complete in %d ' "secs." % ( + " ".join(redacted_command), + timeout, + ) + if not universal_newlines: + err = err.encode(defenc) + return out, err + + # END helpers + kill_check = threading.Event() - watchdog = threading.Timer(kill_after_timeout, _kill_process, args=(proc.pid,)) + watchdog = threading.Timer(timeout, kill_process, args=(proc.pid,)) + else: + communicate = proc.communicate - # Wait for the process to return + # Wait for the process to return. status = 0 stdout_value: Union[str, bytes] = b"" stderr_value: Union[str, bytes] = b"" newline = "\n" if universal_newlines else b"\n" try: if output_stream is None: - if kill_after_timeout is not None: - watchdog.start() - stdout_value, stderr_value = proc.communicate() - if kill_after_timeout is not None: - watchdog.cancel() - if kill_check.is_set(): - stderr_value = 'Timeout: the command "%s" did not complete in %d ' "secs." % ( - " ".join(redacted_command), - kill_after_timeout, - ) - if not universal_newlines: - stderr_value = stderr_value.encode(defenc) - # strip trailing "\n" - if stdout_value.endswith(newline) and strip_newline_in_stdout: # type: ignore + stdout_value, stderr_value = communicate() + # Strip trailing "\n". + if stdout_value.endswith(newline) and strip_newline_in_stdout: # type: ignore[arg-type] stdout_value = stdout_value[:-1] - if stderr_value.endswith(newline): # type: ignore + if stderr_value.endswith(newline): # type: ignore[arg-type] stderr_value = stderr_value[:-1] status = proc.returncode @@ -1075,8 +1354,8 @@ class Git(LazyMixin): stream_copy(proc.stdout, output_stream, max_chunk_size) stdout_value = proc.stdout.read() stderr_value = proc.stderr.read() - # strip trailing "\n" - if stderr_value.endswith(newline): # type: ignore + # Strip trailing "\n". + if stderr_value.endswith(newline): # type: ignore[arg-type] stderr_value = stderr_value[:-1] status = proc.wait() # END stdout handling @@ -1090,10 +1369,10 @@ class Git(LazyMixin): def as_text(stdout_value: Union[bytes, str]) -> str: return not output_stream and safe_decode(stdout_value) or "<OUTPUT_STREAM>" - # end + # END as_text if stderr_value: - log.info( + _logger.info( "%s -> %d; stdout: '%s'; stderr: '%s'", cmdstr, status, @@ -1101,18 +1380,18 @@ class Git(LazyMixin): safe_decode(stderr_value), ) elif stdout_value: - log.info("%s -> %d; stdout: '%s'", cmdstr, status, as_text(stdout_value)) + _logger.info("%s -> %d; stdout: '%s'", cmdstr, status, as_text(stdout_value)) else: - log.info("%s -> %d", cmdstr, status) + _logger.info("%s -> %d", cmdstr, status) # END handle debug printing if with_exceptions and status != 0: raise GitCommandError(redacted_command, status, stderr_value, stdout_value) - if isinstance(stdout_value, bytes) and stdout_as_string: # could also be output_stream + if isinstance(stdout_value, bytes) and stdout_as_string: # Could also be output_stream. stdout_value = safe_decode(stdout_value) - # Allow access to the command's status code + # Allow access to the command's status code. if with_extended_output: return (status, stdout_value, safe_decode(stderr_value)) else: @@ -1122,26 +1401,28 @@ class Git(LazyMixin): return self._environment def update_environment(self, **kwargs: Any) -> Dict[str, Union[str, None]]: - """ - Set environment variables for future git invocations. Return all changed - values in a format that can be passed back into this function to revert - the changes: + """Set environment variables for future git invocations. Return all changed + values in a format that can be passed back into this function to revert the + changes. - ``Examples``:: + Examples:: old_env = self.update_environment(PWD='/tmp') self.update_environment(**old_env) - :param kwargs: environment variables to use for git processes - :return: dict that maps environment variables to their old values + :param kwargs: + Environment variables to use for git processes. + + :return: + Dict that maps environment variables to their old values """ old_env = {} for key, value in kwargs.items(): - # set value if it is None + # Set value if it is None. if value is not None: old_env[key] = self._environment.get(key) self._environment[key] = value - # remove key from environment if its value is None + # Remove key from environment if its value is None. elif key in self._environment: old_env[key] = self._environment[key] del self._environment[key] @@ -1149,16 +1430,16 @@ class Git(LazyMixin): @contextlib.contextmanager def custom_environment(self, **kwargs: Any) -> Iterator[None]: - """ - A context manager around the above ``update_environment`` method to restore the - environment back to its previous state after operation. + """A context manager around the above :meth:`update_environment` method to + restore the environment back to its previous state after operation. - ``Examples``:: + Examples:: with self.custom_environment(GIT_SSH='/bin/ssh_wrapper'): repo.remotes.origin.fetch() - :param kwargs: see update_environment + :param kwargs: + See :meth:`update_environment`. """ old_env = self.update_environment(**kwargs) try: @@ -1183,7 +1464,7 @@ class Git(LazyMixin): return [] def transform_kwargs(self, split_single_char_options: bool = True, **kwargs: Any) -> List[str]: - """Transforms Python style kwargs into git command line options.""" + """Transform Python-style kwargs into git command line options.""" args = [] for k, v in kwargs.items(): if isinstance(v, (list, tuple)): @@ -1205,23 +1486,24 @@ class Git(LazyMixin): return outlist def __call__(self, **kwargs: Any) -> "Git": - """Specify command line options to the git executable - for a subcommand call + """Specify command line options to the git executable for a subcommand call. :param kwargs: - is a dict of keyword arguments. - these arguments are passed as in _call_process - but will be passed to the git command rather than - the subcommand. + A dict of keyword arguments. + These arguments are passed as in :meth:`_call_process`, but will be passed + to the git command rather than the subcommand. + + Examples:: - ``Examples``:: - git(work_tree='/tmp').difftool()""" + git(work_tree='/tmp').difftool() + """ self._git_options = self.transform_kwargs(split_single_char_options=True, **kwargs) return self @overload - def _call_process(self, method: str, *args: None, **kwargs: None) -> str: - ... # if no args given, execute called with all defaults + def _call_process( + self, method: str, *args: None, **kwargs: None + ) -> str: ... # If no args were given, execute the call with all defaults. @overload def _call_process( @@ -1231,36 +1513,37 @@ class Git(LazyMixin): as_process: Literal[True], *args: Any, **kwargs: Any, - ) -> "Git.AutoInterrupt": - ... + ) -> "Git.AutoInterrupt": ... @overload def _call_process( self, method: str, *args: Any, **kwargs: Any - ) -> Union[str, bytes, Tuple[int, Union[str, bytes], str], "Git.AutoInterrupt"]: - ... + ) -> Union[str, bytes, Tuple[int, Union[str, bytes], str], "Git.AutoInterrupt"]: ... def _call_process( self, method: str, *args: Any, **kwargs: Any ) -> Union[str, bytes, Tuple[int, Union[str, bytes], str], "Git.AutoInterrupt"]: - """Run the given git command with the specified arguments and return - the result as a String + """Run the given git command with the specified arguments and return the result + as a string. :param method: - is the command. Contained "_" characters will be converted to dashes, - such as in 'ls_files' to call 'ls-files'. + The command. Contained ``_`` characters will be converted to hyphens, such + as in ``ls_files`` to call ``ls-files``. :param args: - is the list of arguments. If None is included, it will be pruned. - This allows your commands to call git more conveniently as None - is realized as non-existent + The list of arguments. If ``None`` is included, it will be pruned. + This allows your commands to call git more conveniently, as ``None`` is + realized as non-existent. :param kwargs: - It contains key-values for the following: - - the :meth:`execute()` kwds, as listed in :var:`execute_kwargs`; - - "command options" to be converted by :meth:`transform_kwargs()`; - - the `'insert_kwargs_after'` key which its value must match one of ``*args`` - and any cmd-options will be appended after the matched arg. + Contains key-values for the following: + + - The :meth:`execute()` kwds, as listed in ``execute_kwargs``. + - "Command options" to be converted by :meth:`transform_kwargs`. + - The ``insert_kwargs_after`` key which its value must match one of + ``*args``. + + It also contains any command options, to be appended after the matched arg. Examples:: @@ -1268,19 +1551,21 @@ class Git(LazyMixin): turns into:: - git rev-list max-count 10 --header master + git rev-list max-count 10 --header master - :return: Same as ``execute`` - if no args given used execute default (esp. as_process = False, stdout_as_string = True) - and return str""" - # Handle optional arguments prior to calling transform_kwargs - # otherwise these'll end up in args, which is bad. + :return: + Same as :meth:`execute`. If no args are given, used :meth:`execute`'s + default (especially ``as_process = False``, ``stdout_as_string = True``) and + return :class:`str`. + """ + # Handle optional arguments prior to calling transform_kwargs. + # Otherwise these'll end up in args, which is bad. exec_kwargs = {k: v for k, v in kwargs.items() if k in execute_kwargs} opts_kwargs = {k: v for k, v in kwargs.items() if k not in execute_kwargs} insert_after_this_arg = opts_kwargs.pop("insert_kwargs_after", None) - # Prepare the argument list + # Prepare the argument list. opt_args = self.transform_kwargs(**opts_kwargs) ext_args = self._unpack_args([a for a in args if a is not None]) @@ -1295,17 +1580,16 @@ class Git(LazyMixin): "Couldn't find argument '%s' in args %s to insert cmd options after" % (insert_after_this_arg, str(ext_args)) ) from err - # end handle error + # END handle error args_list = ext_args[: index + 1] + opt_args + ext_args[index + 1 :] - # end handle opts_kwargs + # END handle opts_kwargs call = [self.GIT_PYTHON_GIT_EXECUTABLE] - # add persistent git options + # Add persistent git options. call.extend(self._persistent_git_options) - # add the git options, then reset to empty - # to avoid side_effects + # Add the git options, then reset to empty to avoid side effects. call.extend(self._git_options) self._git_options = () @@ -1317,16 +1601,25 @@ class Git(LazyMixin): def _parse_object_header(self, header_line: str) -> Tuple[str, str, int]: """ :param header_line: - <hex_sha> type_string size_as_int + A line of the form:: - :return: (hex_sha, type_string, size_as_int) + <hex_sha> type_string size_as_int - :raise ValueError: if the header contains indication for an error due to - incorrect input sha""" + :return: + (hex_sha, type_string, size_as_int) + + :raise ValueError: + If the header contains indication for an error due to incorrect input sha. + """ tokens = header_line.split() if len(tokens) != 3: if not tokens: - raise ValueError("SHA could not be resolved, git returned: %r" % (header_line.strip())) + err_msg = ( + f"SHA is empty, possible dubious ownership in the repository " + f"""at {self._working_dir}.\n If this is unintended run:\n\n """ + f""" "git config --global --add safe.directory {self._working_dir}" """ + ) + raise ValueError(err_msg) else: raise ValueError("SHA %s could not be resolved, git returned: %r" % (tokens[0], header_line.strip())) # END handle actual return value @@ -1337,12 +1630,12 @@ class Git(LazyMixin): return (tokens[0], tokens[1], int(tokens[2])) def _prepare_ref(self, ref: AnyStr) -> bytes: - # required for command to separate refs on stdin, as bytes + # Required for command to separate refs on stdin, as bytes. if isinstance(ref, bytes): - # Assume 40 bytes hexsha - bin-to-ascii for some reason returns bytes, not text + # Assume 40 bytes hexsha - bin-to-ascii for some reason returns bytes, not text. refstr: str = ref.decode("ascii") elif not isinstance(ref, str): - refstr = str(ref) # could be ref-object + refstr = str(ref) # Could be ref-object. else: refstr = ref @@ -1372,31 +1665,43 @@ class Git(LazyMixin): raise ValueError("cmd stdin was empty") def get_object_header(self, ref: str) -> Tuple[str, str, int]: - """Use this method to quickly examine the type and size of the object behind - the given ref. + """Use this method to quickly examine the type and size of the object behind the + given ref. - :note: The method will only suffer from the costs of command invocation - once and reuses the command in subsequent calls. + :note: + The method will only suffer from the costs of command invocation once and + reuses the command in subsequent calls. - :return: (hexsha, type_string, size_as_int)""" + :return: + (hexsha, type_string, size_as_int) + """ cmd = self._get_persistent_cmd("cat_file_header", "cat_file", batch_check=True) return self.__get_object_header(cmd, ref) def get_object_data(self, ref: str) -> Tuple[str, str, int, bytes]: - """As get_object_header, but returns object data as well + """Similar to :meth:`get_object_header`, but returns object data as well. + + :return: + (hexsha, type_string, size_as_int, data_string) - :return: (hexsha, type_string, size_as_int, data_string) - :note: not threadsafe""" + :note: + Not threadsafe. + """ hexsha, typename, size, stream = self.stream_object_data(ref) data = stream.read(size) del stream return (hexsha, typename, size, data) def stream_object_data(self, ref: str) -> Tuple[str, str, int, "Git.CatFileContentStream"]: - """As get_object_header, but returns the data as a stream + """Similar to :meth:`get_object_data`, but returns the data as a stream. - :return: (hexsha, type_string, size_as_int, stream) - :note: This method is not threadsafe, you need one independent Command instance per thread to be safe !""" + :return: + (hexsha, type_string, size_as_int, stream) + + :note: + This method is not threadsafe. You need one independent :class:`Git` + instance per thread to be safe! + """ cmd = self._get_persistent_cmd("cat_file_all", "cat_file", batch=True) hexsha, typename, size = self.__get_object_header(cmd, ref) cmd_stdout = cmd.stdout if cmd.stdout is not None else io.BytesIO() @@ -1407,7 +1712,9 @@ class Git(LazyMixin): Currently persistent commands will be interrupted. - :return: self""" + :return: + self + """ for cmd in (self.cat_file_all, self.cat_file_header): if cmd: cmd.__del__() diff --git a/git/compat.py b/git/compat.py index 624f261..d7d9a55 100644 --- a/git/compat.py +++ b/git/compat.py @@ -1,31 +1,34 @@ -# -*- coding: utf-8 -*- -# config.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ -"""utilities to help provide compatibility with python 3""" -# flake8: noqa +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Utilities to help provide compatibility with Python 3. + +This module exists for historical reasons. Code outside GitPython may make use of public +members of this module, but is unlikely to benefit from doing so. GitPython continues to +use some of these utilities, in some cases for compatibility across different platforms. +""" import locale import os import sys +import warnings -from gitdb.utils.encoding import ( - force_bytes, # @UnusedImport - force_text, # @UnusedImport -) +from gitdb.utils.encoding import force_bytes, force_text # noqa: F401 # typing -------------------------------------------------------------------- from typing import ( - Any, + Any, # noqa: F401 AnyStr, - Dict, - IO, + Dict, # noqa: F401 + IO, # noqa: F401 + List, Optional, - Tuple, - Type, + TYPE_CHECKING, + Tuple, # noqa: F401 + Type, # noqa: F401 Union, overload, ) @@ -33,24 +36,86 @@ from typing import ( # --------------------------------------------------------------------------- -is_win: bool = os.name == "nt" -is_posix = os.name == "posix" -is_darwin = os.name == "darwin" +_deprecated_platform_aliases = { + "is_win": os.name == "nt", + "is_posix": os.name == "posix", + "is_darwin": sys.platform == "darwin", +} + + +def _getattr(name: str) -> Any: + try: + value = _deprecated_platform_aliases[name] + except KeyError: + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") from None + + warnings.warn( + f"{__name__}.{name} and other is_<platform> aliases are deprecated. " + "Write the desired os.name or sys.platform check explicitly instead.", + DeprecationWarning, + stacklevel=2, + ) + return value + + +if not TYPE_CHECKING: # Preserve static checking for undefined/misspelled attributes. + __getattr__ = _getattr + + +def __dir__() -> List[str]: + return [*globals(), *_deprecated_platform_aliases] + + +is_win: bool +"""Deprecated alias for ``os.name == "nt"`` to check for native Windows. + +This is deprecated because it is clearer to write out :attr:`os.name` or +:attr:`sys.platform` checks explicitly, especially in cases where it matters which is +used. + +:note: + ``is_win`` is ``False`` on Cygwin, but is often wrongly assumed ``True``. To detect + Cygwin, use ``sys.platform == "cygwin"``. +""" + +is_posix: bool +"""Deprecated alias for ``os.name == "posix"`` to check for Unix-like ("POSIX") systems. + +This is deprecated because it clearer to write out :attr:`os.name` or +:attr:`sys.platform` checks explicitly, especially in cases where it matters which is +used. + +:note: + For POSIX systems, more detailed information is available in :attr:`sys.platform`, + while :attr:`os.name` is always ``"posix"`` on such systems, including macOS + (Darwin). +""" + +is_darwin: bool +"""Deprecated alias for ``sys.platform == "darwin"`` to check for macOS (Darwin). + +This is deprecated because it clearer to write out :attr:`os.name` or +:attr:`sys.platform` checks explicitly. + +:note: + For macOS (Darwin), ``os.name == "posix"`` as in other Unix-like systems, while + ``sys.platform == "darwin"``. +""" + defenc = sys.getfilesystemencoding() +"""The encoding used to convert between Unicode and bytes filenames.""" @overload -def safe_decode(s: None) -> None: - ... +def safe_decode(s: None) -> None: ... @overload -def safe_decode(s: AnyStr) -> str: - ... +def safe_decode(s: AnyStr) -> str: ... def safe_decode(s: Union[AnyStr, None]) -> Optional[str]: - """Safely decodes a binary string to unicode""" + """Safely decode a binary string to Unicode.""" if isinstance(s, str): return s elif isinstance(s, bytes): @@ -62,17 +127,15 @@ def safe_decode(s: Union[AnyStr, None]) -> Optional[str]: @overload -def safe_encode(s: None) -> None: - ... +def safe_encode(s: None) -> None: ... @overload -def safe_encode(s: AnyStr) -> bytes: - ... +def safe_encode(s: AnyStr) -> bytes: ... def safe_encode(s: Optional[AnyStr]) -> Optional[bytes]: - """Safely encodes a binary string to unicode""" + """Safely encode a binary string to Unicode.""" if isinstance(s, str): return s.encode(defenc) elif isinstance(s, bytes): @@ -84,17 +147,15 @@ def safe_encode(s: Optional[AnyStr]) -> Optional[bytes]: @overload -def win_encode(s: None) -> None: - ... +def win_encode(s: None) -> None: ... @overload -def win_encode(s: AnyStr) -> bytes: - ... +def win_encode(s: AnyStr) -> bytes: ... def win_encode(s: Optional[AnyStr]) -> Optional[bytes]: - """Encode unicodes for process arguments on Windows.""" + """Encode Unicode strings for process arguments on Windows.""" if isinstance(s, str): return s.encode(locale.getpreferredencoding(False)) elif isinstance(s, bytes): diff --git a/git/config.py b/git/config.py index 76b1491..de35083 100644 --- a/git/config.py +++ b/git/config.py @@ -1,33 +1,27 @@ -# config.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ -"""Module containing module parser implementation able to properly read and write -configuration files""" +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Parser for reading and writing configuration files.""" + +__all__ = ["GitConfigParser", "SectionConstraint"] -import sys import abc +import configparser as cp +import fnmatch from functools import wraps import inspect from io import BufferedReader, IOBase import logging import os +import os.path as osp import re -import fnmatch - -from git.compat import ( - defenc, - force_text, - is_win, -) +import sys +from git.compat import defenc, force_text from git.util import LockFile -import os.path as osp - -import configparser as cp - # typing------------------------------------------------------- from typing import ( @@ -48,14 +42,15 @@ from typing import ( from git.types import Lit_config_levels, ConfigLevels_Tup, PathLike, assert_never, _T if TYPE_CHECKING: - from git.repo.base import Repo from io import BytesIO + from git.repo.base import Repo + T_ConfigParser = TypeVar("T_ConfigParser", bound="GitConfigParser") T_OMD_value = TypeVar("T_OMD_value", str, bytes, int, float, bool) if sys.version_info[:3] < (3, 7, 2): - # typing.Ordereddict not added until py 3.7.2 + # typing.Ordereddict not added until Python 3.7.2. from collections import OrderedDict OrderedDict_OMD = OrderedDict @@ -66,31 +61,27 @@ else: # ------------------------------------------------------------- -__all__ = ("GitConfigParser", "SectionConstraint") - - -log = logging.getLogger("git.config") -log.addHandler(logging.NullHandler()) - -# invariants -# represents the configuration level of a configuration file - +_logger = logging.getLogger(__name__) CONFIG_LEVELS: ConfigLevels_Tup = ("system", "user", "global", "repository") +"""The configuration level of a configuration file.""" - -# Section pattern to detect conditional includes. -# https://git-scm.com/docs/git-config#_conditional_includes CONDITIONAL_INCLUDE_REGEXP = re.compile(r"(?<=includeIf )\"(gitdir|gitdir/i|onbranch):(.+)\"") +"""Section pattern to detect conditional includes. + +See: https://git-scm.com/docs/git-config#_conditional_includes +""" class MetaParserBuilder(abc.ABCMeta): # noqa: B024 - """Utility class wrapping base-class methods into decorators that assure read-only properties""" + """Utility class wrapping base-class methods into decorators that assure read-only + properties.""" def __new__(cls, name: str, bases: Tuple, clsdict: Dict[str, Any]) -> "MetaParserBuilder": + """Equip all base-class methods with a needs_values decorator, and all non-const + methods with a :func:`set_dirty_and_flush_changes` decorator in addition to + that. """ - Equip all base-class methods with a needs_values decorator, and all non-const methods - with a set_dirty_and_flush_changes decorator in addition to that.""" kmm = "_mutating_methods_" if kmm in clsdict: mutating_methods = clsdict[kmm] @@ -109,12 +100,13 @@ class MetaParserBuilder(abc.ABCMeta): # noqa: B024 # END for each base # END if mutating methods configuration is set - new_type = super(MetaParserBuilder, cls).__new__(cls, name, bases, clsdict) + new_type = super().__new__(cls, name, bases, clsdict) return new_type def needs_values(func: Callable[..., _T]) -> Callable[..., _T]: - """Returns method assuring we read values (on demand) before we try to access them""" + """Return a method for ensuring we read values (on demand) before we try to access + them.""" @wraps(func) def assure_data_present(self: "GitConfigParser", *args: Any, **kwargs: Any) -> _T: @@ -126,9 +118,11 @@ def needs_values(func: Callable[..., _T]) -> Callable[..., _T]: def set_dirty_and_flush_changes(non_const_func: Callable[..., _T]) -> Callable[..., _T]: - """Return method that checks whether given non constant function may be called. - If so, the instance will be set dirty. - Additionally, we flush the changes right to disk""" + """Return a method that checks whether given non constant function may be called. + + If so, the instance will be set dirty. Additionally, we flush the changes right to + disk. + """ def flush_changes(self: "GitConfigParser", *args: Any, **kwargs: Any) -> _T: rval = non_const_func(self, *args, **kwargs) @@ -142,16 +136,17 @@ def set_dirty_and_flush_changes(non_const_func: Callable[..., _T]) -> Callable[. class SectionConstraint(Generic[T_ConfigParser]): - """Constrains a ConfigParser to only option commands which are constrained to always use the section we have been initialized with. It supports all ConfigParser methods that operate on an option. :note: - If used as a context manager, will release the wrapped ConfigParser.""" + If used as a context manager, will release the wrapped ConfigParser. + """ __slots__ = ("_config", "_section_name") + _valid_attrs_ = ( "get_value", "set_value", @@ -179,20 +174,21 @@ class SectionConstraint(Generic[T_ConfigParser]): def __getattr__(self, attr: str) -> Any: if attr in self._valid_attrs_: return lambda *args, **kwargs: self._call_config(attr, *args, **kwargs) - return super(SectionConstraint, self).__getattribute__(attr) + return super().__getattribute__(attr) def _call_config(self, method: str, *args: Any, **kwargs: Any) -> Any: - """Call the configuration at the given method which must take a section name - as first argument""" + """Call the configuration at the given method which must take a section name as + first argument.""" return getattr(self._config, method)(self._section_name, *args, **kwargs) @property def config(self) -> T_ConfigParser: - """return: Configparser instance we constrain""" + """return: ConfigParser instance we constrain""" return self._config def release(self) -> None: - """Equivalent to GitConfigParser.release(), which is called on our underlying parser instance""" + """Equivalent to :meth:`GitConfigParser.release`, which is called on our + underlying parser instance.""" return self._config.release() def __enter__(self) -> "SectionConstraint[T_ConfigParser]": @@ -207,36 +203,37 @@ class _OMD(OrderedDict_OMD): """Ordered multi-dict.""" def __setitem__(self, key: str, value: _T) -> None: - super(_OMD, self).__setitem__(key, [value]) + super().__setitem__(key, [value]) def add(self, key: str, value: Any) -> None: if key not in self: - super(_OMD, self).__setitem__(key, [value]) - return None - super(_OMD, self).__getitem__(key).append(value) + super().__setitem__(key, [value]) + return + + super().__getitem__(key).append(value) def setall(self, key: str, values: List[_T]) -> None: - super(_OMD, self).__setitem__(key, values) + super().__setitem__(key, values) def __getitem__(self, key: str) -> Any: - return super(_OMD, self).__getitem__(key)[-1] + return super().__getitem__(key)[-1] def getlast(self, key: str) -> Any: - return super(_OMD, self).__getitem__(key)[-1] + return super().__getitem__(key)[-1] def setlast(self, key: str, value: Any) -> None: if key not in self: - super(_OMD, self).__setitem__(key, [value]) + super().__setitem__(key, [value]) return - prior = super(_OMD, self).__getitem__(key) + prior = super().__getitem__(key) prior[-1] = value def get(self, key: str, default: Union[_T, None] = None) -> Union[_T, None]: - return super(_OMD, self).get(key, [default])[-1] + return super().get(key, [default])[-1] def getall(self, key: str) -> List[_T]: - return super(_OMD, self).__getitem__(key) + return super().__getitem__(key) def items(self) -> List[Tuple[str, _T]]: # type: ignore[override] """List of (key, last value for key).""" @@ -248,9 +245,9 @@ class _OMD(OrderedDict_OMD): def get_config_path(config_level: Lit_config_levels) -> str: - # we do not support an absolute path of the gitconfig on windows , - # use the global config instead - if is_win and config_level == "system": + # We do not support an absolute path of the gitconfig on Windows. + # Use the global config instead. + if sys.platform == "win32" and config_level == "system": config_level = "global" if config_level == "system": @@ -263,7 +260,8 @@ def get_config_path(config_level: Lit_config_levels) -> str: elif config_level == "repository": raise ValueError("No repo to get repository configuration from. Use Repo._get_config_path") else: - # Should not reach here. Will raise ValueError if does. Static typing will warn missing elifs + # Should not reach here. Will raise ValueError if does. Static typing will warn + # about missing elifs. assert_never( # type: ignore[unreachable] config_level, ValueError(f"Invalid configuration level: {config_level!r}"), @@ -271,30 +269,35 @@ def get_config_path(config_level: Lit_config_levels) -> str: class GitConfigParser(cp.RawConfigParser, metaclass=MetaParserBuilder): - """Implements specifics required to read git style configuration files. - This variation behaves much like the git.config command such that the configuration - will be read on demand based on the filepath given during initialization. + This variation behaves much like the :manpage:`git-config(1)` command, such that the + configuration will be read on demand based on the filepath given during + initialization. The changes will automatically be written once the instance goes out of scope, but can be triggered manually as well. - The configuration file will be locked if you intend to change values preventing other - instances to write concurrently. + The configuration file will be locked if you intend to change values preventing + other instances to write concurrently. :note: The config is case-sensitive even when queried, hence section and option names must match perfectly. - If used as a context manager, will release the locked file.""" + + :note: + If used as a context manager, this will release the locked file. + """ # { Configuration - # The lock type determines the type of lock to use in new configuration readers. - # They must be compatible to the LockFile interface. - # A suitable alternative would be the BlockingLockFile t_lock = LockFile - re_comment = re.compile(r"^\s*[#;]") + """The lock type determines the type of lock to use in new configuration readers. + + They must be compatible to the :class:`~git.util.LockFile` interface. + A suitable alternative would be the :class:`~git.util.BlockingLockFile`. + """ + re_comment = re.compile(r"^\s*[#;]") # } END configuration optvalueonly_source = r"\s*(?P<option>[^:=\s][^:=]*)" @@ -305,8 +308,9 @@ class GitConfigParser(cp.RawConfigParser, metaclass=MetaParserBuilder): del optvalueonly_source - # list of RawConfigParser methods able to change the instance _mutating_methods_ = ("add_section", "remove_section", "remove_option", "set") + """Names of :class:`~configparser.RawConfigParser` methods able to change the + instance.""" def __init__( self, @@ -316,30 +320,37 @@ class GitConfigParser(cp.RawConfigParser, metaclass=MetaParserBuilder): config_level: Union[Lit_config_levels, None] = None, repo: Union["Repo", None] = None, ) -> None: - """Initialize a configuration reader to read the given file_or_files and to - possibly allow changes to it by setting read_only False + """Initialize a configuration reader to read the given `file_or_files` and to + possibly allow changes to it by setting `read_only` False. :param file_or_files: - A single file path or file objects or multiple of these + A file path or file object, or a sequence of possibly more than one of them. :param read_only: - If True, the ConfigParser may only read the data , but not change it. - If False, only a single file path or file object may be given. We will write back the changes - when they happen, or when the ConfigParser is released. This will not happen if other - configuration files have been included - :param merge_includes: if True, we will read files mentioned in [include] sections and merge their - contents into ours. This makes it impossible to write back an individual configuration file. - Thus, if you want to modify a single configuration file, turn this off to leave the original - dataset unaltered when reading it. - :param repo: Reference to repository to use if [includeIf] sections are found in configuration files. - + If ``True``, the ConfigParser may only read the data, but not change it. + If ``False``, only a single file path or file object may be given. We will + write back the changes when they happen, or when the ConfigParser is + released. This will not happen if other configuration files have been + included. + + :param merge_includes: + If ``True``, we will read files mentioned in ``[include]`` sections and + merge their contents into ours. This makes it impossible to write back an + individual configuration file. Thus, if you want to modify a single + configuration file, turn this off to leave the original dataset unaltered + when reading it. + + :param repo: + Reference to repository to use if ``[includeIf]`` sections are found in + configuration files. """ cp.RawConfigParser.__init__(self, dict_type=_OMD) - self._dict: Callable[..., _OMD] # type: ignore # mypy/typeshed bug? + self._dict: Callable[..., _OMD] self._defaults: _OMD - self._sections: _OMD # type: ignore # mypy/typeshed bug? + self._sections: _OMD - # Used in python 3, needs to stay in sync with sections for underlying implementation to work + # Used in Python 3. Needs to stay in sync with sections for underlying + # implementation to work. if not hasattr(self, "_proxies"): self._proxies = self._dict() @@ -377,7 +388,7 @@ class GitConfigParser(cp.RawConfigParser, metaclass=MetaParserBuilder): file_or_files = self._file_or_files.name # END get filename from handle/stream - # initialize lock base - we want to write + # Initialize lock base - we want to write. self._lock = self.t_lock(file_or_files) # END lock check @@ -385,8 +396,8 @@ class GitConfigParser(cp.RawConfigParser, metaclass=MetaParserBuilder): # END read-only check def __del__(self) -> None: - """Write pending changes if required and release locks""" - # NOTE: only consistent in PY2 + """Write pending changes if required and release locks.""" + # NOTE: Only consistent in Python 2. self.release() def __enter__(self) -> "GitConfigParser": @@ -397,71 +408,72 @@ class GitConfigParser(cp.RawConfigParser, metaclass=MetaParserBuilder): self.release() def release(self) -> None: - """Flush changes and release the configuration write lock. This instance must not be used anymore afterwards. - In Python 3, it's required to explicitly release locks and flush changes, as __del__ is not called - deterministically anymore.""" - # checking for the lock here makes sure we do not raise during write() - # in case an invalid parser was created who could not get a lock + """Flush changes and release the configuration write lock. This instance must + not be used anymore afterwards. + + In Python 3, it's required to explicitly release locks and flush changes, as + ``__del__`` is not called deterministically anymore. + """ + # Checking for the lock here makes sure we do not raise during write() + # in case an invalid parser was created who could not get a lock. if self.read_only or (self._lock and not self._lock._has_lock()): return try: self.write() except IOError: - log.error("Exception during destruction of GitConfigParser", exc_info=True) + _logger.error("Exception during destruction of GitConfigParser", exc_info=True) except ReferenceError: - # This happens in PY3 ... and usually means that some state cannot be - # written as the sections dict cannot be iterated - # Usually when shutting down the interpreter, don't know how to fix this + # This happens in Python 3... and usually means that some state cannot be + # written as the sections dict cannot be iterated. This usually happens when + # the interpreter is shutting down. Can it be fixed? pass finally: if self._lock is not None: self._lock._release_lock() def optionxform(self, optionstr: str) -> str: - """Do not transform options in any way when writing""" + """Do not transform options in any way when writing.""" return optionstr def _read(self, fp: Union[BufferedReader, IO[bytes]], fpname: str) -> None: - """A direct copy of the py2.4 version of the super class's _read method - to assure it uses ordered dicts. Had to change one line to make it work. - - Future versions have this fixed, but in fact its quite embarrassing for the - guys not to have done it right in the first place ! - - Removed big comments to make it more compact. + """Originally a direct copy of the Python 2.4 version of + :meth:`RawConfigParser._read <configparser.RawConfigParser._read>`, to ensure it + uses ordered dicts. - Made sure it ignores initial whitespace as git uses tabs""" - cursect = None # None, or a dictionary + The ordering bug was fixed in Python 2.4, and dict itself keeps ordering since + Python 3.7. This has some other changes, especially that it ignores initial + whitespace, since git uses tabs. (Big comments are removed to be more compact.) + """ + cursect = None # None, or a dictionary. optname = None lineno = 0 is_multi_line = False - e = None # None, or an exception + e = None # None, or an exception. def string_decode(v: str) -> str: - if v[-1] == "\\": + if v and v.endswith("\\"): v = v[:-1] - # end cut trailing escapes to prevent decode error + # END cut trailing escapes to prevent decode error return v.encode(defenc).decode("unicode_escape") - # end - # end + # END string_decode while True: - # we assume to read binary ! + # We assume to read binary! line = fp.readline().decode(defenc) if not line: break lineno = lineno + 1 - # comment or blank line? + # Comment or blank line? if line.strip() == "" or self.re_comment.match(line): continue if line.split(None, 1)[0].lower() == "rem" and line[0] in "rR": - # no leading whitespace + # No leading whitespace. continue - # is it a section header? + # Is it a section header? mo = self.SECTCRE.match(line.strip()) if not is_multi_line and mo: sectname: str = mo.group("header").strip() @@ -473,16 +485,16 @@ class GitConfigParser(cp.RawConfigParser, metaclass=MetaParserBuilder): cursect = self._dict((("__name__", sectname),)) self._sections[sectname] = cursect self._proxies[sectname] = None - # So sections can't start with a continuation line + # So sections can't start with a continuation line. optname = None - # no section header in the file? + # No section header in the file? elif cursect is None: raise cp.MissingSectionHeaderError(fpname, lineno, line) - # an option line? + # An option line? elif not is_multi_line: mo = self.OPTCRE.match(line) if mo: - # We might just have handled the last line, which could contain a quotation we want to remove + # We might just have handled the last line, which could contain a quotation we want to remove. optname, vi, optval = mo.group("option", "vi", "value") if vi in ("=", ":") and ";" in optval and not optval.strip().startswith('"'): pos = optval.find(";") @@ -491,16 +503,16 @@ class GitConfigParser(cp.RawConfigParser, metaclass=MetaParserBuilder): optval = optval.strip() if optval == '""': optval = "" - # end handle empty string + # END handle empty string optname = self.optionxform(optname.rstrip()) if len(optval) > 1 and optval[0] == '"' and optval[-1] != '"': is_multi_line = True optval = string_decode(optval[1:]) - # end handle multi-line - # preserves multiple values for duplicate optnames + # END handle multi-line + # Preserves multiple values for duplicate optnames. cursect.add(optname, optval) else: - # check if it's an option with no value - it's just ignored by git + # Check if it's an option with no value - it's just ignored by git. if not self.OPTVALUEONLY.match(line): if not e: e = cp.ParsingError(fpname) @@ -511,13 +523,13 @@ class GitConfigParser(cp.RawConfigParser, metaclass=MetaParserBuilder): if line.endswith('"'): is_multi_line = False line = line[:-1] - # end handle quotations + # END handle quotations optval = cursect.getlast(optname) cursect.setlast(optname, optval + string_decode(line)) # END parse section or option # END while reading - # if any parsing errors occurred, raise an exception + # If any parsing errors occurred, raise an exception. if e: raise e @@ -525,8 +537,10 @@ class GitConfigParser(cp.RawConfigParser, metaclass=MetaParserBuilder): return self._merge_includes and len(self._included_paths()) def _included_paths(self) -> List[Tuple[str, str]]: - """Return List all paths that must be included to configuration - as Tuples of (option, value). + """List all paths that must be included to configuration. + + :return: + The list of paths, where each path is a tuple of (option, value). """ paths = [] @@ -573,25 +587,28 @@ class GitConfigParser(cp.RawConfigParser, metaclass=MetaParserBuilder): return paths def read(self) -> None: # type: ignore[override] - """Reads the data stored in the files we have been initialized with. It will - ignore files that cannot be read, possibly leaving an empty configuration + """Read the data stored in the files we have been initialized with. - :return: Nothing - :raise IOError: if a file cannot be handled""" + This will ignore files that cannot be read, possibly leaving an empty + configuration. + + :raise IOError: + If a file cannot be handled. + """ if self._is_initialized: - return None + return self._is_initialized = True files_to_read: List[Union[PathLike, IO]] = [""] if isinstance(self._file_or_files, (str, os.PathLike)): - # for str or Path, as str is a type of Sequence + # For str or Path, as str is a type of Sequence. files_to_read = [self._file_or_files] elif not isinstance(self._file_or_files, (tuple, list, Sequence)): - # could merge with above isinstance once runtime type known + # Could merge with above isinstance once runtime type known. files_to_read = [self._file_or_files] - else: # for lists or tuples + else: # For lists or tuples. files_to_read = list(self._file_or_files) - # end assure we have a copy of the paths to handle + # END ensure we have a copy of the paths to handle seen = set(files_to_read) num_read_include_files = 0 @@ -600,11 +617,12 @@ class GitConfigParser(cp.RawConfigParser, metaclass=MetaParserBuilder): file_ok = False if hasattr(file_path, "seek"): - # must be a file objectfile-object - file_path = cast(IO[bytes], file_path) # replace with assert to narrow type, once sure + # Must be a file-object. + # TODO: Replace cast with assert to narrow type, once sure. + file_path = cast(IO[bytes], file_path) self._read(file_path, file_path.name) else: - # assume a path if it is not a file-object + # Assume a path if it is not a file-object. file_path = cast(PathLike, file_path) try: with open(file_path, "rb") as fp: @@ -613,8 +631,8 @@ class GitConfigParser(cp.RawConfigParser, metaclass=MetaParserBuilder): except IOError: continue - # Read includes and append those that we didn't handle yet - # We expect all paths to be normalized and absolute (and will assure that is the case) + # Read includes and append those that we didn't handle yet. We expect all + # paths to be normalized and absolute (and will ensure that is the case). if self._has_includes(): for _, include_path in self._included_paths(): if include_path.startswith("~"): @@ -622,36 +640,35 @@ class GitConfigParser(cp.RawConfigParser, metaclass=MetaParserBuilder): if not osp.isabs(include_path): if not file_ok: continue - # end ignore relative paths if we don't know the configuration file path + # END ignore relative paths if we don't know the configuration file path file_path = cast(PathLike, file_path) assert osp.isabs(file_path), "Need absolute paths to be sure our cycle checks will work" include_path = osp.join(osp.dirname(file_path), include_path) - # end make include path absolute + # END make include path absolute include_path = osp.normpath(include_path) if include_path in seen or not os.access(include_path, os.R_OK): continue seen.add(include_path) - # insert included file to the top to be considered first + # Insert included file to the top to be considered first. files_to_read.insert(0, include_path) num_read_include_files += 1 - # each include path in configuration file - # end handle includes + # END each include path in configuration file + # END handle includes # END for each file object to read - # If there was no file included, we can safely write back (potentially) the configuration file - # without altering it's meaning + # If there was no file included, we can safely write back (potentially) the + # configuration file without altering its meaning. if num_read_include_files == 0: self._merge_includes = False - # end def _write(self, fp: IO) -> None: """Write an .ini-format representation of the configuration state in - git compatible format""" + git compatible format.""" def write_section(name: str, section_dict: _OMD) -> None: fp.write(("[%s]\n" % name).encode(defenc)) - values: Sequence[str] # runtime only gets str in tests, but should be whatever _OMD stores + values: Sequence[str] # Runtime only gets str in tests, but should be whatever _OMD stores. v: str for key, values in section_dict.items_all(): if key == "__name__": @@ -672,7 +689,7 @@ class GitConfigParser(cp.RawConfigParser, metaclass=MetaParserBuilder): def items(self, section_name: str) -> List[Tuple[str, str]]: # type: ignore[override] """:return: list((option, value), ...) pairs of all items in the given section""" - return [(k, v) for k, v in super(GitConfigParser, self).items(section_name) if k != "__name__"] + return [(k, v) for k, v in super().items(section_name) if k != "__name__"] def items_all(self, section_name: str) -> List[Tuple[str, List[str]]]: """:return: list((option, [values...]), ...) pairs of all items in the given section""" @@ -692,34 +709,36 @@ class GitConfigParser(cp.RawConfigParser, metaclass=MetaParserBuilder): @needs_values def write(self) -> None: - """Write changes to our file, if there are changes at all + """Write changes to our file, if there are changes at all. - :raise IOError: if this is a read-only writer instance or if we could not obtain - a file lock""" + :raise IOError: + If this is a read-only writer instance or if we could not obtain a file + lock. + """ self._assure_writable("write") if not self._dirty: - return None + return if isinstance(self._file_or_files, (list, tuple)): raise AssertionError( "Cannot write back if there is not exactly a single file to write to, have %i files" % len(self._file_or_files) ) - # end assert multiple files + # END assert multiple files if self._has_includes(): - log.debug( + _logger.debug( "Skipping write-back of configuration file as include files were merged in." + "Set merge_includes=False to prevent this." ) - return None - # end + return + # END stop if we have include files fp = self._file_or_files - # we have a physical file on disk, so get a lock - is_file_lock = isinstance(fp, (str, os.PathLike, IOBase)) # can't use Pathlike until 3.5 dropped - if is_file_lock and self._lock is not None: # else raise Error? + # We have a physical file on disk, so get a lock. + is_file_lock = isinstance(fp, (str, os.PathLike, IOBase)) # TODO: Use PathLike (having dropped 3.5). + if is_file_lock and self._lock is not None: # Else raise error? self._lock._obtain_lock() if not hasattr(fp, "seek"): @@ -729,7 +748,7 @@ class GitConfigParser(cp.RawConfigParser, metaclass=MetaParserBuilder): else: fp = cast("BytesIO", fp) fp.seek(0) - # make sure we do not overwrite into an existing file + # Make sure we do not overwrite into an existing file. if hasattr(fp, "truncate"): fp.truncate() self._write(fp) @@ -739,33 +758,37 @@ class GitConfigParser(cp.RawConfigParser, metaclass=MetaParserBuilder): raise IOError("Cannot execute non-constant method %s.%s" % (self, method_name)) def add_section(self, section: str) -> None: - """Assures added options will stay in order""" - return super(GitConfigParser, self).add_section(section) + """Assures added options will stay in order.""" + return super().add_section(section) @property def read_only(self) -> bool: - """:return: True if this instance may change the configuration file""" + """:return: ``True`` if this instance may change the configuration file""" return self._read_only + # FIXME: Figure out if default or return type can really include bool. def get_value( self, section: str, option: str, default: Union[int, float, str, bool, None] = None, ) -> Union[int, float, str, bool]: - # can default or return type include bool? """Get an option's value. - If multiple values are specified for this option in the section, the - last one specified is returned. + If multiple values are specified for this option in the section, the last one + specified is returned. :param default: - If not None, the given default value will be returned in case - the option did not exist - :return: a properly typed value, either int, float or string + If not ``None``, the given default value will be returned in case the option + did not exist. - :raise TypeError: in case the value could not be understood - Otherwise the exceptions known to the ConfigParser will be raised.""" + :return: + A properly typed value, either int, float or string + + :raise TypeError: + In case the value could not be understood. + Otherwise the exceptions known to the ConfigParser will be raised. + """ try: valuestr = self.get(section, option) except Exception: @@ -787,12 +810,16 @@ class GitConfigParser(cp.RawConfigParser, metaclass=MetaParserBuilder): returned. :param default: - If not None, a list containing the given default value will be - returned in case the option did not exist - :return: a list of properly typed values, either int, float or string + If not ``None``, a list containing the given default value will be returned + in case the option did not exist. - :raise TypeError: in case the value could not be understood - Otherwise the exceptions known to the ConfigParser will be raised.""" + :return: + A list of properly typed values, either int, float or string + + :raise TypeError: + In case the value could not be understood. + Otherwise the exceptions known to the ConfigParser will be raised. + """ try: self.sections() lst = self._sections[section].getall(option) @@ -816,7 +843,7 @@ class GitConfigParser(cp.RawConfigParser, metaclass=MetaParserBuilder): continue # END for each numeric type - # try boolean values as git uses them + # Try boolean values as git uses them. vl = valuestr.lower() if vl == "false": return False @@ -839,16 +866,23 @@ class GitConfigParser(cp.RawConfigParser, metaclass=MetaParserBuilder): @needs_values @set_dirty_and_flush_changes def set_value(self, section: str, option: str, value: Union[str, bytes, int, float, bool]) -> "GitConfigParser": - """Sets the given option in section to the given value. - It will create the section if required, and will not throw as opposed to the default - ConfigParser 'set' method. + """Set the given option in section to the given value. - :param section: Name of the section in which the option resides or should reside - :param option: Name of the options whose value to set + This will create the section if required, and will not throw as opposed to the + default ConfigParser ``set`` method. - :param value: Value to set the option to. It must be a string or convertible - to a string - :return: this instance""" + :param section: + Name of the section in which the option resides or should reside. + + :param option: + Name of the options whose value to set. + + :param value: + Value to set the option to. It must be a string or convertible to a string. + + :return: + This instance + """ if not self.has_section(section): self.add_section(section) self.set(section, option, self._value_to_string(value)) @@ -857,39 +891,54 @@ class GitConfigParser(cp.RawConfigParser, metaclass=MetaParserBuilder): @needs_values @set_dirty_and_flush_changes def add_value(self, section: str, option: str, value: Union[str, bytes, int, float, bool]) -> "GitConfigParser": - """Adds a value for the given option in section. - It will create the section if required, and will not throw as opposed to the default - ConfigParser 'set' method. The value becomes the new value of the option as returned - by 'get_value', and appends to the list of values returned by 'get_values`'. + """Add a value for the given option in section. - :param section: Name of the section in which the option resides or should reside - :param option: Name of the option + This will create the section if required, and will not throw as opposed to the + default ConfigParser ``set`` method. The value becomes the new value of the + option as returned by :meth:`get_value`, and appends to the list of values + returned by :meth:`get_values`. - :param value: Value to add to option. It must be a string or convertible - to a string - :return: this instance""" + :param section: + Name of the section in which the option resides or should reside. + + :param option: + Name of the option. + + :param value: + Value to add to option. It must be a string or convertible to a string. + + :return: + This instance + """ if not self.has_section(section): self.add_section(section) self._sections[section].add(option, self._value_to_string(value)) return self def rename_section(self, section: str, new_name: str) -> "GitConfigParser": - """rename the given section to new_name - :raise ValueError: if section doesn't exit - :raise ValueError: if a section with new_name does already exist - :return: this instance + """Rename the given section to `new_name`. + + :raise ValueError: + If: + + * `section` doesn't exist. + * A section with `new_name` does already exist. + + :return: + This instance """ if not self.has_section(section): raise ValueError("Source section '%s' doesn't exist" % section) if self.has_section(new_name): raise ValueError("Destination section '%s' already exists" % new_name) - super(GitConfigParser, self).add_section(new_name) + super().add_section(new_name) new_section = self._sections[new_name] for k, vs in self.items_all(section): new_section.setall(k, vs) - # end for each value to copy + # END for each value to copy - # This call writes back the changes, which is why we don't have the respective decorator + # This call writes back the changes, which is why we don't have the respective + # decorator. self.remove_section(section) return self diff --git a/git/db.py b/git/db.py index bff4334..cacd030 100644 --- a/git/db.py +++ b/git/db.py @@ -1,58 +1,66 @@ -"""Module with our own gitdb implementation - it uses the git command""" -from git.util import bin_to_hex, hex_to_bin -from gitdb.base import OInfo, OStream -from gitdb.db import GitDB # @UnusedImport -from gitdb.db import LooseObjectDB +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Module with our own gitdb implementation - it uses the git command.""" + +__all__ = ["GitCmdObjectDB", "GitDB"] +from gitdb.base import OInfo, OStream +from gitdb.db import GitDB, LooseObjectDB from gitdb.exc import BadObject + +from git.util import bin_to_hex, hex_to_bin from git.exc import GitCommandError # typing------------------------------------------------- from typing import TYPE_CHECKING + from git.types import PathLike if TYPE_CHECKING: from git.cmd import Git - # -------------------------------------------------------- -__all__ = ("GitCmdObjectDB", "GitDB") - class GitCmdObjectDB(LooseObjectDB): - """A database representing the default git object store, which includes loose - objects, pack files and an alternates file + objects, pack files and an alternates file. It will create objects only in the loose object database. - :note: for now, we use the git command to do all the lookup, just until he - have packs and the other implementations """ def __init__(self, root_path: PathLike, git: "Git") -> None: - """Initialize this instance with the root and a git command""" - super(GitCmdObjectDB, self).__init__(root_path) + """Initialize this instance with the root and a git command.""" + super().__init__(root_path) self._git = git def info(self, binsha: bytes) -> OInfo: + """Get a git object header (using git itself).""" hexsha, typename, size = self._git.get_object_header(bin_to_hex(binsha)) return OInfo(hex_to_bin(hexsha), typename, size) def stream(self, binsha: bytes) -> OStream: - """For now, all lookup is done by git itself""" + """Get git object data as a stream supporting ``read()`` (using git itself).""" hexsha, typename, size, stream = self._git.stream_object_data(bin_to_hex(binsha)) return OStream(hex_to_bin(hexsha), typename, size, stream) # { Interface def partial_to_complete_sha_hex(self, partial_hexsha: str) -> bytes: - """:return: Full binary 20 byte sha from the given partial hexsha - :raise AmbiguousObjectName: - :raise BadObject: - :note: currently we only raise BadObject as git does not communicate - AmbiguousObjects separately""" + """ + :return: + Full binary 20 byte sha from the given partial hexsha + + :raise gitdb.exc.AmbiguousObjectName: + + :raise gitdb.exc.BadObject: + + :note: + Currently we only raise :exc:`~gitdb.exc.BadObject` as git does not + communicate ambiguous objects separately. + """ try: hexsha, _typename, _size = self._git.get_object_header(partial_hexsha) return hex_to_bin(hexsha) diff --git a/git/diff.py b/git/diff.py index 3e3de7b..9c6ae59 100644 --- a/git/diff.py +++ b/git/diff.py @@ -1,18 +1,20 @@ -# diff.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ +__all__ = ["DiffConstants", "NULL_TREE", "INDEX", "Diffable", "DiffIndex", "Diff"] + +import enum import re +import warnings + from git.cmd import handle_process_output from git.compat import defenc +from git.objects.blob import Blob +from git.objects.util import mode_str_to_int from git.util import finalize_process, hex_to_bin -from .objects.blob import Blob -from .objects.util import mode_str_to_int - - # typing ------------------------------------------------------------------ from typing import ( @@ -22,38 +24,75 @@ from typing import ( Match, Optional, Tuple, - Type, + TYPE_CHECKING, TypeVar, Union, - TYPE_CHECKING, cast, ) -from git.types import PathLike, Literal +from git.types import Literal, PathLike if TYPE_CHECKING: - from .objects.tree import Tree - from .objects import Commit - from git.repo.base import Repo - from git.objects.base import IndexObject from subprocess import Popen - from git import Git + + from git.cmd import Git + from git.objects.base import IndexObject + from git.objects.commit import Commit + from git.objects.tree import Tree + from git.repo.base import Repo Lit_change_type = Literal["A", "D", "C", "M", "R", "T", "U"] +# ------------------------------------------------------------------------ -# def is_change_type(inp: str) -> TypeGuard[Lit_change_type]: -# # return True -# return inp in ['A', 'D', 'C', 'M', 'R', 'T', 'U'] -# ------------------------------------------------------------------------ +@enum.unique +class DiffConstants(enum.Enum): + """Special objects for :meth:`Diffable.diff`. + + See the :meth:`Diffable.diff` method's ``other`` parameter, which accepts various + values including these. + + :note: + These constants are also available as attributes of the :mod:`git.diff` module, + the :class:`Diffable` class and its subclasses and instances, and the top-level + :mod:`git` module. + """ + + NULL_TREE = enum.auto() + """Stand-in indicating you want to compare against the empty tree in diffs. + Also accessible as :const:`git.NULL_TREE`, :const:`git.diff.NULL_TREE`, and + :const:`Diffable.NULL_TREE`. + """ -__all__ = ("Diffable", "DiffIndex", "Diff", "NULL_TREE") + INDEX = enum.auto() + """Stand-in indicating you want to diff against the index. -# Special object to compare against the empty tree in diffs -NULL_TREE = object() + Also accessible as :const:`git.INDEX`, :const:`git.diff.INDEX`, and + :const:`Diffable.INDEX`, as well as :const:`Diffable.Index`. The latter has been + kept for backward compatibility and made an alias of this, so it may still be used. + """ -_octal_byte_re = re.compile(b"\\\\([0-9]{3})") + +NULL_TREE: Literal[DiffConstants.NULL_TREE] = DiffConstants.NULL_TREE +"""Stand-in indicating you want to compare against the empty tree in diffs. + +See :meth:`Diffable.diff`, which accepts this as a value of its ``other`` parameter. + +This is an alias of :const:`DiffConstants.NULL_TREE`, which may also be accessed as +:const:`git.NULL_TREE` and :const:`Diffable.NULL_TREE`. +""" + +INDEX: Literal[DiffConstants.INDEX] = DiffConstants.INDEX +"""Stand-in indicating you want to diff against the index. + +See :meth:`Diffable.diff`, which accepts this as a value of its ``other`` parameter. + +This is an alias of :const:`DiffConstants.INDEX`, which may also be accessed as +:const:`git.INDEX` and :const:`Diffable.INDEX`, as well as :const:`Diffable.Index`. +""" + +_octal_byte_re = re.compile(rb"\\([0-9]{3})") def _octal_repl(matchobj: Match) -> bytes: @@ -79,106 +118,152 @@ def decode_path(path: bytes, has_ab_prefix: bool = True) -> Optional[bytes]: return path -class Diffable(object): - - """Common interface for all object that can be diffed against another object of compatible type. +class Diffable: + """Common interface for all objects that can be diffed against another object of + compatible type. :note: - Subclasses require a repo member as it is the case for Object instances, for practical - reasons we do not derive from Object.""" + Subclasses require a :attr:`repo` member, as it is the case for + :class:`~git.objects.base.Object` instances. For practical reasons we do not + derive from :class:`~git.objects.base.Object`. + """ __slots__ = () - # standin indicating you want to diff against the index - class Index(object): - pass + repo: "Repo" + """Repository to operate on. Must be provided by subclass or sibling class.""" + + NULL_TREE = NULL_TREE + """Stand-in indicating you want to compare against the empty tree in diffs. + + See the :meth:`diff` method, which accepts this as a value of its ``other`` + parameter. + + This is the same as :const:`DiffConstants.NULL_TREE`, and may also be accessed as + :const:`git.NULL_TREE` and :const:`git.diff.NULL_TREE`. + """ + + INDEX = INDEX + """Stand-in indicating you want to diff against the index. + + See the :meth:`diff` method, which accepts this as a value of its ``other`` + parameter. + + This is the same as :const:`DiffConstants.INDEX`, and may also be accessed as + :const:`git.INDEX` and :const:`git.diff.INDEX`, as well as :class:`Diffable.INDEX`, + which is kept for backward compatibility (it is now defined an alias of this). + """ + + Index = INDEX + """Stand-in indicating you want to diff against the index + (same as :const:`~Diffable.INDEX`). + + This is an alias of :const:`~Diffable.INDEX`, for backward compatibility. See + :const:`~Diffable.INDEX` and :meth:`diff` for details. + + :note: + Although always meant for use as an opaque constant, this was formerly defined + as a class. Its usage is unchanged, but static type annotations that attempt + to permit only this object must be changed to avoid new mypy errors. This was + previously not possible to do, though ``Type[Diffable.Index]`` approximated it. + It is now possible to do precisely, using ``Literal[DiffConstants.INDEX]``. + """ def _process_diff_args( - self, args: List[Union[str, "Diffable", Type["Diffable.Index"], object]] - ) -> List[Union[str, "Diffable", Type["Diffable.Index"], object]]: + self, + args: List[Union[PathLike, "Diffable"]], + ) -> List[Union[PathLike, "Diffable"]]: """ :return: - possibly altered version of the given args list. - Method is called right before git command execution. - Subclasses can use it to alter the behaviour of the superclass""" + Possibly altered version of the given args list. + This method is called right before git command execution. + Subclasses can use it to alter the behaviour of the superclass. + """ return args def diff( self, - other: Union[Type["Index"], "Tree", "Commit", None, str, object] = Index, + other: Union[DiffConstants, "Tree", "Commit", str, None] = INDEX, paths: Union[PathLike, List[PathLike], Tuple[PathLike, ...], None] = None, create_patch: bool = False, **kwargs: Any, - ) -> "DiffIndex": - """Creates diffs between two items being trees, trees and index or an - index and the working tree. It will detect renames automatically. + ) -> "DiffIndex[Diff]": + """Create diffs between two items being trees, trees and index or an index and + the working tree. Detects renames automatically. :param other: - Is the item to compare us with. - If None, we will be compared to the working tree. - If Treeish, it will be compared against the respective tree - If Index ( type ), it will be compared against the index. - If git.NULL_TREE, it will compare against the empty tree. - It defaults to Index to assure the method will not by-default fail - on bare repositories. + This the item to compare us with. + + * If ``None``, we will be compared to the working tree. + + * If a :class:`~git.types.Tree_ish` or string, it will be compared against + the respective tree. + + * If :const:`INDEX`, it will be compared against the index. + + * If :const:`NULL_TREE`, it will compare against the empty tree. + + This parameter defaults to :const:`INDEX` (rather than ``None``) so that the + method will not by default fail on bare repositories. :param paths: - is a list of paths or a single path to limit the diff to. - It will only include at least one of the given path or paths. + This a list of paths or a single path to limit the diff to. It will only + include at least one of the given path or paths. :param create_patch: - If True, the returned Diff contains a detailed patch that if applied - makes the self to other. Patches are somewhat costly as blobs have to be read - and diffed. + If ``True``, the returned :class:`Diff` contains a detailed patch that if + applied makes the self to other. Patches are somewhat costly as blobs have + to be read and diffed. :param kwargs: - Additional arguments passed to git-diff, such as - R=True to swap both sides of the diff. + Additional arguments passed to :manpage:`git-diff(1)`, such as ``R=True`` to + swap both sides of the diff. - :return: git.DiffIndex + :return: + A :class:`DiffIndex` representing the computed diff. :note: - On a bare repository, 'other' needs to be provided as Index or as - as Tree/Commit, or a git command error will occur""" - args: List[Union[PathLike, Diffable, Type["Diffable.Index"], object]] = [] - args.append("--abbrev=40") # we need full shas - args.append("--full-index") # get full index paths, not only filenames + On a bare repository, `other` needs to be provided as :const:`INDEX`, or as + an instance of :class:`~git.objects.tree.Tree` or + :class:`~git.objects.commit.Commit`, or a git command error will occur. + """ + args: List[Union[PathLike, Diffable]] = [] + args.append("--abbrev=40") # We need full shas. + args.append("--full-index") # Get full index paths, not only filenames. - # remove default '-M' arg (check for renames) if user is overriding it + # Remove default '-M' arg (check for renames) if user is overriding it. if not any(x in kwargs for x in ("find_renames", "no_renames", "M")): args.append("-M") if create_patch: args.append("-p") + args.append("--no-ext-diff") else: args.append("--raw") args.append("-z") - # in any way, assure we don't see colored output, - # fixes https://github.com/gitpython-developers/GitPython/issues/172 + # Ensure we never see colored output. + # Fixes: https://github.com/gitpython-developers/GitPython/issues/172 args.append("--no-color") if paths is not None and not isinstance(paths, (tuple, list)): paths = [paths] - if hasattr(self, "Has_Repo"): - self.repo: "Repo" = self.repo - diff_cmd = self.repo.git.diff - if other is self.Index: + if other is INDEX: args.insert(0, "--cached") elif other is NULL_TREE: - args.insert(0, "-r") # recursive diff-tree + args.insert(0, "-r") # Recursive diff-tree. args.insert(0, "--root") diff_cmd = self.repo.git.diff_tree elif other is not None: - args.insert(0, "-r") # recursive diff-tree + args.insert(0, "-r") # Recursive diff-tree. args.insert(0, other) diff_cmd = self.repo.git.diff_tree args.insert(0, self) - # paths is list here or None + # paths is a list or tuple here, or None. if paths: args.append("--") args.extend(paths) @@ -198,27 +283,29 @@ T_Diff = TypeVar("T_Diff", bound="Diff") class DiffIndex(List[T_Diff]): + R"""An index for diffs, allowing a list of :class:`Diff`\s to be queried by the diff + properties. - """Implements an Index for diffs, allowing a list of Diffs to be queried by - the diff properties. + The class improves the diff handling convenience. + """ - The class improves the diff handling convenience""" - - # change type invariant identifying possible ways a blob can have changed - # A = Added - # D = Deleted - # R = Renamed - # M = Modified - # T = Changed in the type change_type = ("A", "C", "D", "R", "M", "T") + """Change type invariant identifying possible ways a blob can have changed: + + * ``A`` = Added + * ``D`` = Deleted + * ``R`` = Renamed + * ``M`` = Modified + * ``T`` = Changed in the type + """ def iter_change_type(self, change_type: Lit_change_type) -> Iterator[T_Diff]: """ :return: - iterator yielding Diff instances that match the given change_type + Iterator yielding :class:`Diff` instances that match the given `change_type` :param change_type: - Member of DiffIndex.change_type, namely: + Member of :attr:`DiffIndex.change_type`, namely: * 'A' for added paths * 'D' for deleted paths @@ -238,46 +325,47 @@ class DiffIndex(List[T_Diff]): yield diffidx elif change_type == "C" and diffidx.copied_file: yield diffidx - elif change_type == "R" and diffidx.renamed: + elif change_type == "R" and diffidx.renamed_file: yield diffidx elif change_type == "M" and diffidx.a_blob and diffidx.b_blob and diffidx.a_blob != diffidx.b_blob: yield diffidx # END for each diff -class Diff(object): - +class Diff: """A Diff contains diff information between two Trees. - It contains two sides a and b of the diff, members are prefixed with - "a" and "b" respectively to inidcate that. + It contains two sides a and b of the diff. Members are prefixed with "a" and "b" + respectively to indicate that. Diffs keep information about the changed blob objects, the file mode, renames, deletions and new files. - There are a few cases where None has to be expected as member variable value: + There are a few cases where ``None`` has to be expected as member variable value: - ``New File``:: + New File:: a_mode is None a_blob is None a_path is None - ``Deleted File``:: + Deleted File:: b_mode is None b_blob is None b_path is None - ``Working Tree Blobs`` + Working Tree Blobs: When comparing to working trees, the working tree blob will have a null hexsha - as a corresponding object does not yet exist. The mode will be null as well. - But the path will be available though. - If it is listed in a diff the working tree version of the file must - be different to the version in the index or tree, and hence has been modified.""" + as a corresponding object does not yet exist. The mode will be null as well. The + path will be available, though. + + If it is listed in a diff, the working tree version of the file must differ from + the version in the index or tree, and hence has been modified. + """ - # precompiled regex + # Precompiled regex. re_header = re.compile( rb""" ^diff[ ]--git @@ -299,7 +387,8 @@ class Diff(object): """, re.VERBOSE | re.MULTILINE, ) - # can be used for comparisons + + # These can be used for comparisons. NULL_HEX_SHA = "0" * 40 NULL_BIN_SHA = b"\0" * 20 @@ -346,8 +435,8 @@ class Diff(object): self.a_mode = mode_str_to_int(a_mode) if a_mode else None self.b_mode = mode_str_to_int(b_mode) if b_mode else None - # Determine whether this diff references a submodule, if it does then - # we need to overwrite "repo" to the corresponding submodule's repo instead + # Determine whether this diff references a submodule. If it does then + # we need to overwrite "repo" to the corresponding submodule's repo instead. if repo and a_rawpath: for submodule in repo.submodules: if submodule.path == a_rawpath.decode(defenc, "replace"): @@ -371,7 +460,7 @@ class Diff(object): self.deleted_file: bool = deleted_file self.copied_file: bool = copied_file - # be clear and use None instead of empty strings + # Be clear and use None instead of empty strings. assert raw_rename_from is None or isinstance(raw_rename_from, bytes) assert raw_rename_to is None or isinstance(raw_rename_to, bytes) self.raw_rename_from = raw_rename_from or None @@ -395,15 +484,15 @@ class Diff(object): return hash(tuple(getattr(self, n) for n in self.__slots__)) def __str__(self) -> str: - h: str = "%s" + h = "%s" if self.a_blob: h %= self.a_blob.path elif self.b_blob: h %= self.b_blob.path - msg: str = "" - line = None # temp line - line_length = 0 # line length + msg = "" + line = None + line_length = 0 for b, n in zip((self.a_blob, self.b_blob), ("lhs", "rhs")): if b: line = "\n%s: %o | %s" % (n, b.mode, b.hexsha) @@ -414,7 +503,7 @@ class Diff(object): msg += line # END for each blob - # add headline + # Add headline. h += "\n" + "=" * line_length if self.deleted_file: @@ -433,15 +522,11 @@ class Diff(object): msg += self.diff.decode(defenc) if isinstance(self.diff, bytes) else self.diff except UnicodeDecodeError: msg += "OMITTED BINARY DATA" - # end handle encoding + # END handle encoding msg += "\n---" # END diff info - # Python2 silliness: have to assure we convert our likely to be unicode object to a string with the - # right encoding. Otherwise it tries to convert it using ascii, which may fail ungracefully - res = h + msg - # end - return res + return h + msg @property def a_path(self) -> Optional[str]: @@ -461,14 +546,25 @@ class Diff(object): @property def renamed(self) -> bool: - """:returns: True if the blob of our diff has been renamed - :note: This property is deprecated, please use ``renamed_file`` instead. + """Deprecated, use :attr:`renamed_file` instead. + + :return: + ``True`` if the blob of our diff has been renamed + + :note: + This property is deprecated. + Please use the :attr:`renamed_file` property instead. """ + warnings.warn( + "Diff.renamed is deprecated, use Diff.renamed_file instead", + DeprecationWarning, + stacklevel=2, + ) return self.renamed_file @property def renamed_file(self) -> bool: - """:returns: True if the blob of our diff has been renamed""" + """:return: ``True`` if the blob of our diff has been renamed""" return self.rename_from != self.rename_to @classmethod @@ -485,23 +581,32 @@ class Diff(object): return None @classmethod - def _index_from_patch_format(cls, repo: "Repo", proc: Union["Popen", "Git.AutoInterrupt"]) -> DiffIndex: - """Create a new DiffIndex from the given text which must be in patch format - :param repo: is the repository we are operating on - it is required - :param stream: result of 'git diff' as a stream (supporting file protocol) - :return: git.DiffIndex""" + def _index_from_patch_format(cls, repo: "Repo", proc: Union["Popen", "Git.AutoInterrupt"]) -> DiffIndex["Diff"]: + """Create a new :class:`DiffIndex` from the given process output which must be + in patch format. + + :param repo: + The repository we are operating on. + + :param proc: + :manpage:`git-diff(1)` process to read from + (supports :class:`Git.AutoInterrupt <git.cmd.Git.AutoInterrupt>` wrapper). - ## FIXME: Here SLURPING raw, need to re-phrase header-regexes linewise. + :return: + :class:`DiffIndex` + """ + + # FIXME: Here SLURPING raw, need to re-phrase header-regexes linewise. text_list: List[bytes] = [] handle_process_output(proc, text_list.append, None, finalize_process, decode_streams=False) - # for now, we have to bake the stream + # For now, we have to bake the stream. text = b"".join(text_list) index: "DiffIndex" = DiffIndex() previous_header: Union[Match[bytes], None] = None header: Union[Match[bytes], None] = None - a_path, b_path = None, None # for mypy - a_mode, b_mode = None, None # for mypy + a_path, b_path = None, None # For mypy. + a_mode, b_mode = None, None # For mypy. for _header in cls.re_header.finditer(text): ( a_path_fallback, @@ -529,14 +634,14 @@ class Diff(object): a_path = cls._pick_best_path(a_path, rename_from, a_path_fallback) b_path = cls._pick_best_path(b_path, rename_to, b_path_fallback) - # Our only means to find the actual text is to see what has not been matched by our regex, - # and then retro-actively assign it to our index + # Our only means to find the actual text is to see what has not been matched + # by our regex, and then retro-actively assign it to our index. if previous_header is not None: index[-1].diff = text[previous_header.end() : _header.start()] - # end assign actual diff + # END assign actual diff - # Make sure the mode is set if the path is set. Otherwise the resulting blob is invalid - # We just use the one mode we should have parsed + # Make sure the mode is set if the path is set. Otherwise the resulting blob + # is invalid. We just use the one mode we should have parsed. a_mode = old_mode or deleted_file_mode or (a_path and (b_mode or new_mode or new_file_mode)) b_mode = b_mode or new_mode or new_file_mode or (b_path and a_mode) index.append( @@ -561,15 +666,15 @@ class Diff(object): previous_header = _header header = _header - # end for each header we parse + # END for each header we parse if index and header: index[-1].diff = text[header.end() :] - # end assign last diff + # END assign last diff return index @staticmethod - def _handle_diff_line(lines_bytes: bytes, repo: "Repo", index: DiffIndex) -> None: + def _handle_diff_line(lines_bytes: bytes, repo: "Repo", index: DiffIndex["Diff"]) -> None: lines = lines_bytes.decode(defenc) # Discard everything before the first colon, and the colon itself. @@ -577,7 +682,7 @@ class Diff(object): for line in lines.split("\x00:"): if not line: - # The line data is empty, skip + # The line data is empty, skip. continue meta, _, path = line.partition("\x00") path = path.rstrip("\x00") @@ -587,11 +692,10 @@ class Diff(object): # Change type can be R100 # R: status letter # 100: score (in case of copy and rename) - # assert is_change_type(_change_type[0]), f"Unexpected value for change_type received: {_change_type[0]}" change_type: Lit_change_type = cast(Lit_change_type, _change_type[0]) score_str = "".join(_change_type[1:]) score = int(score_str) if score_str.isdigit() else None - path = path.strip() + path = path.strip("\n") a_path = path.encode(defenc) b_path = path.encode(defenc) deleted_file = False @@ -600,8 +704,8 @@ class Diff(object): rename_from = None rename_to = None - # NOTE: We cannot conclude from the existence of a blob to change type - # as diffs with the working do not have blobs yet + # NOTE: We cannot conclude from the existence of a blob to change type, + # as diffs with the working do not have blobs yet. if change_type == "D": b_blob_id = None # Optional[str] deleted_file = True @@ -619,7 +723,7 @@ class Diff(object): b_path = b_path_str.encode(defenc) rename_from, rename_to = a_path, b_path elif change_type == "T": - # Nothing to do + # Nothing to do. pass # END add/remove handling @@ -643,9 +747,19 @@ class Diff(object): index.append(diff) @classmethod - def _index_from_raw_format(cls, repo: "Repo", proc: "Popen") -> "DiffIndex": - """Create a new DiffIndex from the given stream which must be in raw format. - :return: git.DiffIndex""" + def _index_from_raw_format(cls, repo: "Repo", proc: "Popen") -> "DiffIndex[Diff]": + """Create a new :class:`DiffIndex` from the given process output which must be + in raw format. + + :param repo: + The repository we are operating on. + + :param proc: + Process to read output from. + + :return: + :class:`DiffIndex` + """ # handles # :100644 100644 687099101... 37c5e30c8... M .gitignore diff --git a/git/exc.py b/git/exc.py index 0786a8e..583eee8 100644 --- a/git/exc.py +++ b/git/exc.py @@ -1,18 +1,55 @@ -# exc.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ -""" Module containing all exceptions thrown throughout the git package, """ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Exceptions thrown throughout the git package.""" + +__all__ = [ + # Defined in gitdb.exc: + "AmbiguousObjectName", + "BadName", + "BadObject", + "BadObjectType", + "InvalidDBRoot", + "ODBError", + "ParseError", + "UnsupportedOperation", + # Introduced in this module: + "GitError", + "InvalidGitRepositoryError", + "WorkTreeRepositoryUnsupported", + "NoSuchPathError", + "UnsafeProtocolError", + "UnsafeOptionError", + "CommandError", + "GitCommandNotFound", + "GitCommandError", + "CheckoutError", + "CacheError", + "UnmergedEntriesError", + "HookExecutionError", + "RepositoryDirtyError", +] + +from gitdb.exc import ( + AmbiguousObjectName, + BadName, + BadObject, + BadObjectType, + InvalidDBRoot, + ODBError, + ParseError, + UnsupportedOperation, +) -from gitdb.exc import BadName # NOQA @UnusedWildImport skipcq: PYL-W0401, PYL-W0614 -from gitdb.exc import * # NOQA @UnusedWildImport skipcq: PYL-W0401, PYL-W0614 from git.compat import safe_decode from git.util import remove_password_if_present # typing ---------------------------------------------------- -from typing import List, Sequence, Tuple, Union, TYPE_CHECKING +from typing import List, Sequence, Tuple, TYPE_CHECKING, Union + from git.types import PathLike if TYPE_CHECKING: @@ -22,7 +59,7 @@ if TYPE_CHECKING: class GitError(Exception): - """Base class for all package exceptions""" + """Base class for all package exceptions.""" class InvalidGitRepositoryError(GitError): @@ -30,7 +67,7 @@ class InvalidGitRepositoryError(GitError): class WorkTreeRepositoryUnsupported(InvalidGitRepositoryError): - """Thrown to indicate we can't handle work tree repositories""" + """Thrown to indicate we can't handle work tree repositories.""" class NoSuchPathError(GitError, OSError): @@ -46,16 +83,20 @@ class UnsafeOptionError(GitError): class CommandError(GitError): - """Base class for exceptions thrown at every stage of `Popen()` execution. + """Base class for exceptions thrown at every stage of :class:`~subprocess.Popen` + execution. :param command: A non-empty list of argv comprising the command-line. """ - #: A unicode print-format with 2 `%s for `<cmdline>` and the rest, - #: e.g. - #: "'%s' failed%s" _msg = "Cmd('%s') failed%s" + """Format string with 2 ``%s`` for ``<cmdline>`` and the rest. + + For example: ``"'%s' failed%s"`` + + Subclasses may override this attribute, provided it is still in this form. + """ def __init__( self, @@ -97,11 +138,11 @@ class CommandError(GitError): class GitCommandNotFound(CommandError): - """Thrown if we cannot find the `git` executable in the PATH or at the path given by - the GIT_PYTHON_GIT_EXECUTABLE environment variable""" + """Thrown if we cannot find the ``git`` executable in the :envvar:`PATH` or at the + path given by the :envvar:`GIT_PYTHON_GIT_EXECUTABLE` environment variable.""" def __init__(self, command: Union[List[str], Tuple[str], str], cause: Union[str, Exception]) -> None: - super(GitCommandNotFound, self).__init__(command, cause) + super().__init__(command, cause) self._msg = "Cmd('%s') not found%s" @@ -115,22 +156,22 @@ class GitCommandError(CommandError): stderr: Union[bytes, str, None] = None, stdout: Union[bytes, str, None] = None, ) -> None: - super(GitCommandError, self).__init__(command, status, stderr, stdout) + super().__init__(command, status, stderr, stdout) class CheckoutError(GitError): """Thrown if a file could not be checked out from the index as it contained changes. - The .failed_files attribute contains a list of relative paths that failed - to be checked out as they contained changes that did not exist in the index. + The :attr:`failed_files` attribute contains a list of relative paths that failed to + be checked out as they contained changes that did not exist in the index. - The .failed_reasons attribute contains a string informing about the actual + The :attr:`failed_reasons` attribute contains a string informing about the actual cause of the issue. - The .valid_files attribute contains a list of relative paths to files that - were checked out successfully and hence match the version stored in the - index""" + The :attr:`valid_files` attribute contains a list of relative paths to files that + were checked out successfully and hence match the version stored in the index. + """ def __init__( self, @@ -149,18 +190,20 @@ class CheckoutError(GitError): class CacheError(GitError): - - """Base for all errors related to the git index, which is called cache internally""" + """Base for all errors related to the git index, which is called "cache" + internally.""" class UnmergedEntriesError(CacheError): """Thrown if an operation cannot proceed as there are still unmerged - entries in the cache""" + entries in the cache.""" class HookExecutionError(CommandError): - """Thrown if a hook exits with a non-zero exit code. It provides access to the exit code and the string returned - via standard output""" + """Thrown if a hook exits with a non-zero exit code. + + This provides access to the exit code and the string returned via standard output. + """ def __init__( self, @@ -169,12 +212,13 @@ class HookExecutionError(CommandError): stderr: Union[bytes, str, None] = None, stdout: Union[bytes, str, None] = None, ) -> None: - super(HookExecutionError, self).__init__(command, status, stderr, stdout) + super().__init__(command, status, stderr, stdout) self._msg = "Hook('%s') failed%s" class RepositoryDirtyError(GitError): - """Thrown whenever an operation on a repository fails as it has uncommitted changes that would be overwritten""" + """Thrown whenever an operation on a repository fails as it has uncommitted changes + that would be overwritten.""" def __init__(self, repo: "Repo", message: str) -> None: self.repo = repo diff --git a/git/index/__init__.py b/git/index/__init__.py index 96b721f..ba48110 100644 --- a/git/index/__init__.py +++ b/git/index/__init__.py @@ -1,4 +1,16 @@ -"""Initialize the index package""" -# flake8: noqa -from .base import * -from .typ import * +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Initialize the index package.""" + +__all__ = [ + "BaseIndexEntry", + "BlobFilter", + "CheckoutError", + "IndexEntry", + "IndexFile", + "StageType", +] + +from .base import CheckoutError, IndexFile +from .typ import BaseIndexEntry, BlobFilter, IndexEntry, StageType diff --git a/git/index/base.py b/git/index/base.py index 0cdeb1c..39cc914 100644 --- a/git/index/base.py +++ b/git/index/base.py @@ -1,32 +1,34 @@ -# index.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ -from contextlib import ExitStack +"""Module containing :class:`IndexFile`, an Index implementation facilitating all kinds +of index manipulations such as querying and merging.""" + +__all__ = ["IndexFile", "CheckoutError", "StageType"] + +import contextlib import datetime import glob from io import BytesIO import os +import os.path as osp from stat import S_ISLNK import subprocess +import sys import tempfile -from git.compat import ( - force_bytes, - defenc, -) -from git.exc import GitCommandError, CheckoutError, GitError, InvalidGitRepositoryError -from git.objects import ( - Blob, - Submodule, - Tree, - Object, - Commit, -) +from gitdb.base import IStream +from gitdb.db import MemoryDB + +from git.compat import defenc, force_bytes +import git.diff as git_diff +from git.exc import CheckoutError, GitCommandError, GitError, InvalidGitRepositoryError +from git.objects import Blob, Commit, Object, Submodule, Tree from git.objects.util import Serializable from git.util import ( + Actor, LazyMixin, LockedFD, join_path_native, @@ -35,27 +37,18 @@ from git.util import ( unbare_repo, to_bin_sha, ) -from gitdb.base import IStream -from gitdb.db import MemoryDB - -import git.diff as git_diff -import os.path as osp from .fun import ( + S_IFGITLINK, + aggressive_tree_merge, entry_key, - write_cache, read_cache, - aggressive_tree_merge, - write_tree_from_cache, - stat_mode_to_index_mode, - S_IFGITLINK, run_commit_hook, + stat_mode_to_index_mode, + write_cache, + write_tree_from_cache, ) -from .typ import ( - BaseIndexEntry, - IndexEntry, - StageType, -) +from .typ import BaseIndexEntry, IndexEntry, StageType from .util import TemporaryFileSwap, post_clear_cache, default_index, git_working_dir # typing ----------------------------------------------------------------------------- @@ -65,6 +58,7 @@ from typing import ( BinaryIO, Callable, Dict, + Generator, IO, Iterable, Iterator, @@ -73,17 +67,16 @@ from typing import ( Sequence, TYPE_CHECKING, Tuple, - Type, Union, ) -from git.types import Commit_ish, PathLike +from git.types import Literal, PathLike if TYPE_CHECKING: from subprocess import Popen - from git.repo import Repo + from git.refs.reference import Reference - from git.util import Actor + from git.repo import Repo Treeish = Union[Tree, Commit, str, bytes] @@ -91,42 +84,67 @@ Treeish = Union[Tree, Commit, str, bytes] # ------------------------------------------------------------------------------------ -__all__ = ("IndexFile", "CheckoutError", "StageType") - +@contextlib.contextmanager +def _named_temporary_file_for_subprocess(directory: PathLike) -> Generator[str, None, None]: + """Create a named temporary file git subprocesses can open, deleting it afterward. -class IndexFile(LazyMixin, git_diff.Diffable, Serializable): + :param directory: + The directory in which the file is created. + :return: + A context manager object that creates the file and provides its name on entry, + and deletes it on exit. """ - Implements an Index that can be manipulated using a native implementation in - order to save git command function calls wherever possible. + if sys.platform == "win32": + fd, name = tempfile.mkstemp(dir=directory) + os.close(fd) + try: + yield name + finally: + os.remove(name) + else: + with tempfile.NamedTemporaryFile(dir=directory) as ctx: + yield ctx.name + + +class IndexFile(LazyMixin, git_diff.Diffable, Serializable): + """An Index that can be manipulated using a native implementation in order to save + git command function calls wherever possible. - It provides custom merging facilities allowing to merge without actually changing - your index or your working tree. This way you can perform own test-merges based - on the index only without having to deal with the working copy. This is useful - in case of partial working trees. + This provides custom merging facilities allowing to merge without actually changing + your index or your working tree. This way you can perform your own test merges based + on the index only without having to deal with the working copy. This is useful in + case of partial working trees. - ``Entries`` + Entries: - The index contains an entries dict whose keys are tuples of type IndexEntry - to facilitate access. + The index contains an entries dict whose keys are tuples of type + :class:`~git.index.typ.IndexEntry` to facilitate access. - You may read the entries dict or manipulate it using IndexEntry instance, i.e.:: + You may read the entries dict or manipulate it using IndexEntry instance, i.e.:: - index.entries[index.entry_key(index_entry_instance)] = index_entry_instance + index.entries[index.entry_key(index_entry_instance)] = index_entry_instance - Make sure you use index.write() once you are done manipulating the index directly - before operating on it using the git command""" + Make sure you use :meth:`index.write() <write>` once you are done manipulating the + index directly before operating on it using the git command. + """ __slots__ = ("repo", "version", "entries", "_extension_data", "_file_path") - _VERSION = 2 # latest version we support - S_IFGITLINK = S_IFGITLINK # a submodule + + _VERSION = 2 + """The latest version we support.""" + + S_IFGITLINK = S_IFGITLINK + """Flags for a submodule.""" def __init__(self, repo: "Repo", file_path: Union[PathLike, None] = None) -> None: - """Initialize this Index instance, optionally from the given ``file_path``. - If no file_path is given, we will be created from the current index file. + """Initialize this Index instance, optionally from the given `file_path`. + + If no `file_path` is given, we will be created from the current index file. If a stream is not given, the stream will be initialized from the current - repository's index on demand.""" + repository's index on demand. + """ self.repo = repo self.version = self._VERSION self._extension_data = b"" @@ -137,9 +155,9 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): try: fd = os.open(self._file_path, os.O_RDONLY) except OSError: - # in new repositories, there may be no index, which means we are empty + # In new repositories, there may be no index, which means we are empty. self.entries: Dict[Tuple[PathLike, StageType], IndexEntry] = {} - return None + return # END exception handling try: @@ -149,7 +167,7 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): self._deserialize(stream) else: - super(IndexFile, self)._set_cache_(attr) + super()._set_cache_(attr) def _index_path(self) -> PathLike: if self.repo.git_dir: @@ -163,23 +181,24 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): return self._file_path def _delete_entries_cache(self) -> None: - """Safely clear the entries cache so it can be recreated""" + """Safely clear the entries cache so it can be recreated.""" try: del self.entries except AttributeError: - # fails in python 2.6.5 with this exception + # It failed in Python 2.6.5 with AttributeError. + # FIXME: Look into whether we can just remove this except clause now. pass # END exception handling # { Serializable Interface def _deserialize(self, stream: IO) -> "IndexFile": - """Initialize this instance with index values read from the given stream""" + """Initialize this instance with index values read from the given stream.""" self.version, self.entries, self._extension_data, _conten_sha = read_cache(stream) return self def _entries_sorted(self) -> List[IndexEntry]: - """:return: list of entries, in a sorted fashion, first by path, then by stage""" + """:return: List of entries, in a sorted fashion, first by path, then by stage""" return sorted(self.entries.values(), key=lambda e: (e.path, e.stage)) def _serialize(self, stream: IO, ignore_extension_data: bool = False) -> "IndexFile": @@ -197,30 +216,26 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): file_path: Union[None, PathLike] = None, ignore_extension_data: bool = False, ) -> None: - """Write the current state to our file path or to the given one + """Write the current state to our file path or to the given one. :param file_path: - If None, we will write to our stored file path from which we have - been initialized. Otherwise we write to the given file path. - Please note that this will change the file_path of this index to - the one you gave. + If ``None``, we will write to our stored file path from which we have been + initialized. Otherwise we write to the given file path. Please note that + this will change the `file_path` of this index to the one you gave. :param ignore_extension_data: - If True, the TREE type extension data read in the index will not - be written to disk. NOTE that no extension data is actually written. - Use this if you have altered the index and - would like to use git-write-tree afterwards to create a tree - representing your written changes. - If this data is present in the written index, git-write-tree - will instead write the stored/cached tree. - Alternatively, use IndexFile.write_tree() to handle this case - automatically - - :return: self # does it? or returns None?""" - # make sure we have our entries read before getting a write lock - # else it would be done when streaming. This can happen - # if one doesn't change the index, but writes it right away - self.entries + If ``True``, the TREE type extension data read in the index will not be + written to disk. NOTE that no extension data is actually written. Use this + if you have altered the index and would like to use + :manpage:`git-write-tree(1)` afterwards to create a tree representing your + written changes. If this data is present in the written index, + :manpage:`git-write-tree(1)` will instead write the stored/cached tree. + Alternatively, use :meth:`write_tree` to handle this case automatically. + """ + # Make sure we have our entries read before getting a write lock. + # Otherwise it would be done when streaming. + # This can happen if one doesn't change the index, but writes it right away. + self.entries # noqa: B018 lfd = LockedFD(file_path or self._file_path) stream = lfd.open(write=True, stream=True) @@ -232,36 +247,36 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): lfd.commit() - # make sure we represent what we have written + # Make sure we represent what we have written. if file_path is not None: self._file_path = file_path @post_clear_cache @default_index def merge_tree(self, rhs: Treeish, base: Union[None, Treeish] = None) -> "IndexFile": - """Merge the given rhs treeish into the current index, possibly taking + """Merge the given `rhs` treeish into the current index, possibly taking a common base treeish into account. - As opposed to the :func:`IndexFile.from_tree` method, this allows you to use an already - existing tree as the left side of the merge + As opposed to the :func:`from_tree` method, this allows you to use an already + existing tree as the left side of the merge. :param rhs: - treeish reference pointing to the 'other' side of the merge. + Treeish reference pointing to the 'other' side of the merge. :param base: - optional treeish reference pointing to the common base of 'rhs' and - this index which equals lhs + Optional treeish reference pointing to the common base of `rhs` and this + index which equals lhs. :return: - self ( containing the merge and possibly unmerged entries in case of - conflicts ) - - :raise GitCommandError: - If there is a merge conflict. The error will - be raised at the first conflicting path. If you want to have proper - merge resolution to be done by yourself, you have to commit the changed - index ( or make a valid tree from it ) and retry with a three-way - index.from_tree call.""" + self (containing the merge and possibly unmerged entries in case of + conflicts) + + :raise git.exc.GitCommandError: + If there is a merge conflict. The error will be raised at the first + conflicting path. If you want to have proper merge resolution to be done by + yourself, you have to commit the changed index (or make a valid tree from + it) and retry with a three-way :meth:`index.from_tree <from_tree>` call. + """ # -i : ignore working tree status # --aggressive : handle more merge cases # -m : do an actual merge @@ -276,22 +291,25 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): @classmethod def new(cls, repo: "Repo", *tree_sha: Union[str, Tree]) -> "IndexFile": """Merge the given treeish revisions into a new index which is returned. - This method behaves like git-read-tree --aggressive when doing the merge. - :param repo: The repository treeish are located in. + This method behaves like ``git-read-tree --aggressive`` when doing the merge. + + :param repo: + The repository treeish are located in. :param tree_sha: - 20 byte or 40 byte tree sha or tree objects + 20 byte or 40 byte tree sha or tree objects. :return: - New IndexFile instance. Its path will be undefined. - If you intend to write such a merged Index, supply an alternate file_path - to its 'write' method.""" + New :class:`IndexFile` instance. Its path will be undefined. + If you intend to write such a merged Index, supply an alternate + ``file_path`` to its :meth:`write` method. + """ tree_sha_bytes: List[bytes] = [to_bin_sha(str(t)) for t in tree_sha] base_entries = aggressive_tree_merge(repo.odb, tree_sha_bytes) inst = cls(repo) - # convert to entries dict + # Convert to entries dict. entries: Dict[Tuple[PathLike, int], IndexEntry] = dict( zip( ((e.path, e.stage) for e in base_entries), @@ -304,78 +322,84 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): @classmethod def from_tree(cls, repo: "Repo", *treeish: Treeish, **kwargs: Any) -> "IndexFile": - """Merge the given treeish revisions into a new index which is returned. - The original index will remain unaltered + R"""Merge the given treeish revisions into a new index which is returned. + The original index will remain unaltered. :param repo: The repository treeish are located in. :param treeish: - One, two or three Tree Objects, Commits or 40 byte hexshas. The result - changes according to the amount of trees. - If 1 Tree is given, it will just be read into a new index - If 2 Trees are given, they will be merged into a new index using a - two way merge algorithm. Tree 1 is the 'current' tree, tree 2 is the 'other' - one. It behaves like a fast-forward. - If 3 Trees are given, a 3-way merge will be performed with the first tree - being the common ancestor of tree 2 and tree 3. Tree 2 is the 'current' tree, - tree 3 is the 'other' one + One, two or three :class:`~git.objects.tree.Tree` objects, + :class:`~git.objects.commit.Commit`\s or 40 byte hexshas. + + The result changes according to the amount of trees: + + 1. If 1 Tree is given, it will just be read into a new index. + 2. If 2 Trees are given, they will be merged into a new index using a two + way merge algorithm. Tree 1 is the 'current' tree, tree 2 is the 'other' + one. It behaves like a fast-forward. + 3. If 3 Trees are given, a 3-way merge will be performed with the first tree + being the common ancestor of tree 2 and tree 3. Tree 2 is the 'current' + tree, tree 3 is the 'other' one. :param kwargs: - Additional arguments passed to git-read-tree + Additional arguments passed to :manpage:`git-read-tree(1)`. :return: - New IndexFile instance. It will point to a temporary index location which - does not exist anymore. If you intend to write such a merged Index, supply - an alternate file_path to its 'write' method. + New :class:`IndexFile` instance. It will point to a temporary index location + which does not exist anymore. If you intend to write such a merged Index, + supply an alternate ``file_path`` to its :meth:`write` method. :note: - In the three-way merge case, --aggressive will be specified to automatically - resolve more cases in a commonly correct manner. Specify trivial=True as kwarg - to override that. + In the three-way merge case, ``--aggressive`` will be specified to + automatically resolve more cases in a commonly correct manner. Specify + ``trivial=True`` as a keyword argument to override that. - As the underlying git-read-tree command takes into account the current index, - it will be temporarily moved out of the way to assure there are no unsuspected - interferences.""" + As the underlying :manpage:`git-read-tree(1)` command takes into account the + current index, it will be temporarily moved out of the way to prevent any + unexpected interference. + """ if len(treeish) == 0 or len(treeish) > 3: raise ValueError("Please specify between 1 and 3 treeish, got %i" % len(treeish)) arg_list: List[Union[Treeish, str]] = [] - # ignore that working tree and index possibly are out of date + # Ignore that the working tree and index possibly are out of date. if len(treeish) > 1: - # drop unmerged entries when reading our index and merging + # Drop unmerged entries when reading our index and merging. arg_list.append("--reset") - # handle non-trivial cases the way a real merge does + # Handle non-trivial cases the way a real merge does. arg_list.append("--aggressive") # END merge handling - # tmp file created in git home directory to be sure renaming - # works - /tmp/ dirs could be on another device - with ExitStack() as stack: - tmp_index = stack.enter_context(tempfile.NamedTemporaryFile(dir=repo.git_dir)) - arg_list.append("--index-output=%s" % tmp_index.name) + # Create the temporary file in the .git directory to be sure renaming + # works - /tmp/ directories could be on another device. + with _named_temporary_file_for_subprocess(repo.git_dir) as tmp_index: + arg_list.append("--index-output=%s" % tmp_index) arg_list.extend(treeish) - # move current index out of the way - otherwise the merge may fail - # as it considers existing entries. moving it essentially clears the index. + # Move the current index out of the way - otherwise the merge may fail as it + # considers existing entries. Moving it essentially clears the index. # Unfortunately there is no 'soft' way to do it. - # The TemporaryFileSwap assure the original file get put back - - stack.enter_context(TemporaryFileSwap(join_path_native(repo.git_dir, "index"))) - repo.git.read_tree(*arg_list, **kwargs) - index = cls(repo, tmp_index.name) - index.entries # force it to read the file as we will delete the temp-file - return index + # The TemporaryFileSwap ensures the original file gets put back. + with TemporaryFileSwap(join_path_native(repo.git_dir, "index")): + repo.git.read_tree(*arg_list, **kwargs) + index = cls(repo, tmp_index) + index.entries # noqa: B018 # Force it to read the file as we will delete the temp-file. + return index # END index merge handling # UTILITIES + @unbare_repo def _iter_expand_paths(self: "IndexFile", paths: Sequence[PathLike]) -> Iterator[PathLike]: - """Expand the directories in list of paths to the corresponding paths accordingly, + """Expand the directories in list of paths to the corresponding paths + accordingly. - Note: git will add items multiple times even if a glob overlapped - with manually specified paths or if paths where specified multiple - times - we respect that and do not prune""" + :note: + git will add items multiple times even if a glob overlapped with manually + specified paths or if paths where specified multiple times - we respect that + and do not prune. + """ def raise_exc(e: Exception) -> NoReturn: raise e @@ -389,25 +413,25 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): # END make absolute path try: - st = os.lstat(abs_path) # handles non-symlinks as well + st = os.lstat(abs_path) # Handles non-symlinks as well. except OSError: - # the lstat call may fail as the path may contain globs as well + # The lstat call may fail as the path may contain globs as well. pass else: if S_ISLNK(st.st_mode): yield abs_path.replace(rs, "") continue - # end check symlink + # END check symlink - # if the path is not already pointing to an existing file, resolve globs if possible + # If the path is not already pointing to an existing file, resolve globs if possible. if not os.path.exists(abs_path) and ("?" in abs_path or "*" in abs_path or "[" in abs_path): resolved_paths = glob.glob(abs_path) # not abs_path in resolved_paths: - # a glob() resolving to the same path we are feeding it with - # is a glob() that failed to resolve. If we continued calling - # ourselves we'd endlessly recurse. If the condition below - # evaluates to true then we are likely dealing with a file - # whose name contains wildcard characters. + # A glob() resolving to the same path we are feeding it with is a + # glob() that failed to resolve. If we continued calling ourselves + # we'd endlessly recurse. If the condition below evaluates to true + # then we are likely dealing with a file whose name contains wildcard + # characters. if abs_path not in resolved_paths: for f in self._iter_expand_paths(glob.glob(abs_path)): yield str(f).replace(rs, "") @@ -416,12 +440,12 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): try: for root, _dirs, files in os.walk(abs_path, onerror=raise_exc): for rela_file in files: - # add relative paths only + # Add relative paths only. yield osp.join(root.replace(rs, ""), rela_file) # END for each file in subdir # END for each subdirectory except OSError: - # was a file or something that could not be iterated + # It was a file or something that could not be iterated. yield abs_path.replace(rs, "") # END path exception handling # END for each path @@ -435,20 +459,28 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): fprogress: Callable[[PathLike, bool, PathLike], None], read_from_stdout: bool = True, ) -> Union[None, str]: - """Write path to proc.stdin and make sure it processes the item, including progress. - - :return: stdout string - :param read_from_stdout: if True, proc.stdout will be read after the item - was sent to stdin. In that case, it will return None - :note: There is a bug in git-update-index that prevents it from sending - reports just in time. This is why we have a version that tries to - read stdout and one which doesn't. In fact, the stdout is not - important as the piped-in files are processed anyway and just in time - :note: Newlines are essential here, gits behaviour is somewhat inconsistent - on this depending on the version, hence we try our best to deal with - newlines carefully. Usually the last newline will not be sent, instead - we will close stdin to break the pipe.""" + """Write path to ``proc.stdin`` and make sure it processes the item, including + progress. + + :return: + stdout string + + :param read_from_stdout: + If ``True``, ``proc.stdout`` will be read after the item was sent to stdin. + In that case, it will return ``None``. + + :note: + There is a bug in :manpage:`git-update-index(1)` that prevents it from + sending reports just in time. This is why we have a version that tries to + read stdout and one which doesn't. In fact, the stdout is not important as + the piped-in files are processed anyway and just in time. + :note: + Newlines are essential here, git's behaviour is somewhat inconsistent on + this depending on the version, hence we try our best to deal with newlines + carefully. Usually the last newline will not be sent, instead we will close + stdin to break the pipe. + """ fprogress(filepath, False, item) rval: Union[None, str] = None @@ -456,7 +488,7 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): try: proc.stdin.write(("%s\n" % filepath).encode(defenc)) except IOError as e: - # pipe broke, usually because some error happened + # Pipe broke, usually because some error happened. raise fmakeexc() from e # END write exception handling proc.stdin.flush() @@ -470,12 +502,15 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): self, predicate: Callable[[Tuple[StageType, Blob]], bool] = lambda t: True ) -> Iterator[Tuple[StageType, Blob]]: """ - :return: Iterator yielding tuples of Blob objects and stages, tuple(stage, Blob) + :return: + Iterator yielding tuples of :class:`~git.objects.blob.Blob` objects and + stages, tuple(stage, Blob). :param predicate: - Function(t) returning True if tuple(stage, Blob) should be yielded by the - iterator. A default filter, the BlobFilter, allows you to yield blobs - only if they match a given list of paths.""" + Function(t) returning ``True`` if tuple(stage, Blob) should be yielded by + the iterator. A default filter, the `~git.index.typ.BlobFilter`, allows you + to yield blobs only if they match a given list of paths. + """ for entry in self.entries.values(): blob = entry.to_blob(self.repo) blob.size = entry.size @@ -487,15 +522,13 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): def unmerged_blobs(self) -> Dict[PathLike, List[Tuple[StageType, Blob]]]: """ :return: - Dict(path : list( tuple( stage, Blob, ...))), being - a dictionary associating a path in the index with a list containing - sorted stage/blob pairs - + Dict(path : list(tuple(stage, Blob, ...))), being a dictionary associating a + path in the index with a list containing sorted stage/blob pairs. :note: - Blobs that have been removed in one side simply do not exist in the - given stage. I.e. a file removed on the 'other' branch whose entries - are at stage 3 will not have a stage 3 entry. + Blobs that have been removed in one side simply do not exist in the given + stage. That is, a file removed on the 'other' branch whose entries are at + stage 3 will not have a stage 3 entry. """ is_unmerged_blob = lambda t: t[0] != 0 path_map: Dict[PathLike, List[Tuple[StageType, Blob]]] = {} @@ -512,19 +545,23 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): return entry_key(*entry) def resolve_blobs(self, iter_blobs: Iterator[Blob]) -> "IndexFile": - """Resolve the blobs given in blob iterator. This will effectively remove the - index entries of the respective path at all non-null stages and add the given - blob as new stage null blob. + """Resolve the blobs given in blob iterator. + + This will effectively remove the index entries of the respective path at all + non-null stages and add the given blob as new stage null blob. - For each path there may only be one blob, otherwise a ValueError will be raised - claiming the path is already at stage 0. + For each path there may only be one blob, otherwise a :exc:`ValueError` will be + raised claiming the path is already at stage 0. - :raise ValueError: if one of the blobs already existed at stage 0 - :return: self + :raise ValueError: + If one of the blobs already existed at stage 0. + + :return: + self :note: You will have to write the index manually once you are done, i.e. - index.resolve_blobs(blobs).write() + ``index.resolve_blobs(blobs).write()``. """ for blob in iter_blobs: stage_null_key = (blob.path, 0) @@ -532,7 +569,7 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): raise ValueError("Path %r already exists at stage 0" % str(blob.path)) # END assert blob is not stage 0 already - # delete all possible stages + # Delete all possible stages. for stage in (1, 2, 3): try: del self.entries[(blob.path, stage)] @@ -550,42 +587,53 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): """Reread the contents of our index file, discarding all cached information we might have. - :note: This is a possibly dangerious operations as it will discard your changes - to index.entries - :return: self""" + :note: + This is a possibly dangerous operations as it will discard your changes to + :attr:`index.entries <entries>`. + + :return: + self + """ self._delete_entries_cache() - # allows to lazily reread on demand + # Allows to lazily reread on demand. return self def write_tree(self) -> Tree: - """Writes this index to a corresponding Tree object into the repository's - object database and return it. - - :return: Tree object representing this index - :note: The tree will be written even if one or more objects the tree refers to - does not yet exist in the object database. This could happen if you added - Entries to the index directly. - :raise ValueError: if there are no entries in the cache - :raise UnmergedEntriesError:""" - # we obtain no lock as we just flush our contents to disk as tree - # If we are a new index, the entries access will load our data accordingly + """Write this index to a corresponding :class:`~git.objects.tree.Tree` object + into the repository's object database and return it. + + :return: + :class:`~git.objects.tree.Tree` object representing this index. + + :note: + The tree will be written even if one or more objects the tree refers to does + not yet exist in the object database. This could happen if you added entries + to the index directly. + + :raise ValueError: + If there are no entries in the cache. + + :raise git.exc.UnmergedEntriesError: + """ + # We obtain no lock as we just flush our contents to disk as tree. + # If we are a new index, the entries access will load our data accordingly. mdb = MemoryDB() entries = self._entries_sorted() binsha, tree_items = write_tree_from_cache(entries, mdb, slice(0, len(entries))) - # copy changed trees only + # Copy changed trees only. mdb.stream_copy(mdb.sha_iter(), self.repo.odb) - # note: additional deserialization could be saved if write_tree_from_cache - # would return sorted tree entries + # Note: Additional deserialization could be saved if write_tree_from_cache would + # return sorted tree entries. root_tree = Tree(self.repo, binsha, path="") root_tree._cache = tree_items return root_tree def _process_diff_args( - self, # type: ignore[override] - args: List[Union[str, "git_diff.Diffable", Type["git_diff.Diffable.Index"]]], - ) -> List[Union[str, "git_diff.Diffable", Type["git_diff.Diffable.Index"]]]: + self, + args: List[Union[PathLike, "git_diff.Diffable"]], + ) -> List[Union[PathLike, "git_diff.Diffable"]]: try: args.pop(args.index(self)) except IndexError: @@ -595,18 +643,22 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): def _to_relative_path(self, path: PathLike) -> PathLike: """ - :return: Version of path relative to our git directory or raise ValueError - if it is not within our git directory""" + :return: + Version of path relative to our git directory or raise :exc:`ValueError` if + it is not within our git directory. + + :raise ValueError: + """ if not osp.isabs(path): return path if self.repo.bare: raise InvalidGitRepositoryError("require non-bare repository") - if not str(path).startswith(str(self.repo.working_tree_dir)): + if not osp.normpath(str(path)).startswith(str(self.repo.working_tree_dir)): raise ValueError("Absolute path %r is not in git repository at %r" % (path, self.repo.working_tree_dir)) return os.path.relpath(path, self.repo.working_tree_dir) def _preprocess_add_items( - self, items: Sequence[Union[PathLike, Blob, BaseIndexEntry, "Submodule"]] + self, items: Union[PathLike, Sequence[Union[PathLike, Blob, BaseIndexEntry, "Submodule"]]] ) -> Tuple[List[PathLike], List[BaseIndexEntry]]: """Split the items into two lists of path strings and BaseEntries.""" paths = [] @@ -628,11 +680,16 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): return paths, entries def _store_path(self, filepath: PathLike, fprogress: Callable) -> BaseIndexEntry: - """Store file at filepath in the database and return the base index entry - Needs the git_working_dir decorator active ! This must be assured in the calling code""" - st = os.lstat(filepath) # handles non-symlinks as well + """Store file at filepath in the database and return the base index entry. + + :note: + This needs the :func:`~git.index.util.git_working_dir` decorator active! + This must be ensured in the calling code. + """ + st = os.lstat(filepath) # Handles non-symlinks as well. if S_ISLNK(st.st_mode): - # in PY3, readlink is string, but we need bytes. In PY2, it's just OS encoded bytes, we assume UTF-8 + # In PY3, readlink is a string, but we need bytes. + # In PY2, it was just OS encoded bytes, we assumed UTF-8. open_stream: Callable[[], BinaryIO] = lambda: BytesIO(force_bytes(os.readlink(filepath), encoding=defenc)) else: open_stream = lambda: open(filepath, "rb") @@ -668,7 +725,7 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): gitrelative_path = path if self.repo.working_tree_dir: abspath = osp.join(self.repo.working_tree_dir, gitrelative_path) - # end obtain relative and absolute paths + # END obtain relative and absolute paths blob = Blob( self.repo, @@ -692,15 +749,15 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): def add( self, - items: Sequence[Union[PathLike, Blob, BaseIndexEntry, "Submodule"]], + items: Union[PathLike, Sequence[Union[PathLike, Blob, BaseIndexEntry, "Submodule"]]], force: bool = True, fprogress: Callable = lambda *args: None, path_rewriter: Union[Callable[..., PathLike], None] = None, write: bool = True, write_extension_data: bool = False, ) -> List[BaseIndexEntry]: - """Add files from the working tree, specific blobs or BaseIndexEntries - to the index. + R"""Add files from the working tree, specific blobs, or + :class:`~git.index.typ.BaseIndexEntry`\s to the index. :param items: Multiple types of items are supported, types can be mixed within one call. @@ -708,103 +765,122 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): relative or absolute. - path string - strings denote a relative or absolute path into the repository pointing to - an existing file, i.e. CHANGES, lib/myfile.ext, '/home/gitrepo/lib/myfile.ext'. - Absolute paths must start with working tree directory of this index's repository - to be considered valid. For example, if it was initialized with a non-normalized path, like - `/root/repo/../repo`, absolute paths to be added must start with `/root/repo/../repo`. + Strings denote a relative or absolute path into the repository pointing + to an existing file, e.g., ``CHANGES``, `lib/myfile.ext``, + ``/home/gitrepo/lib/myfile.ext``. + + Absolute paths must start with working tree directory of this index's + repository to be considered valid. For example, if it was initialized + with a non-normalized path, like ``/root/repo/../repo``, absolute paths + to be added must start with ``/root/repo/../repo``. Paths provided like this must exist. When added, they will be written into the object database. - PathStrings may contain globs, such as 'lib/__init__*' or can be directories - like 'lib', the latter ones will add all the files within the directory and - subdirectories. + PathStrings may contain globs, such as ``lib/__init__*``. Or they can be + directories like ``lib``, which will add all the files within the + directory and subdirectories. - This equals a straight git-add. + This equals a straight :manpage:`git-add(1)`. - They are added at stage 0 + They are added at stage 0. + + - :class:~`git.objects.blob.Blob` or + :class:`~git.objects.submodule.base.Submodule` object - - Blob or Submodule object Blobs are added as they are assuming a valid mode is set. - The file they refer to may or may not exist in the file system, but - must be a path relative to our repository. - If their sha is null ( 40*0 ), their path must exist in the file system - relative to the git repository as an object will be created from - the data at the path. - The handling now very much equals the way string paths are processed, except that - the mode you have set will be kept. This allows you to create symlinks - by settings the mode respectively and writing the target of the symlink - directly into the file. This equals a default Linux-Symlink which - is not dereferenced automatically, except that it can be created on - filesystems not supporting it as well. + The file they refer to may or may not exist in the file system, but must + be a path relative to our repository. + + If their sha is null (40*0), their path must exist in the file system + relative to the git repository as an object will be created from the + data at the path. - Please note that globs or directories are not allowed in Blob objects. + The handling now very much equals the way string paths are processed, + except that the mode you have set will be kept. This allows you to + create symlinks by settings the mode respectively and writing the target + of the symlink directly into the file. This equals a default Linux + symlink which is not dereferenced automatically, except that it can be + created on filesystems not supporting it as well. - They are added at stage 0 + Please note that globs or directories are not allowed in + :class:`~git.objects.blob.Blob` objects. - - BaseIndexEntry or type - Handling equals the one of Blob objects, but the stage may be - explicitly set. Please note that Index Entries require binary sha's. + They are added at stage 0. + + - :class:`~git.index.typ.BaseIndexEntry` or type + + Handling equals the one of :class:~`git.objects.blob.Blob` objects, but + the stage may be explicitly set. Please note that Index Entries require + binary sha's. :param force: **CURRENTLY INEFFECTIVE** - If True, otherwise ignored or excluded files will be - added anyway. - As opposed to the git-add command, we enable this flag by default - as the API user usually wants the item to be added even though - they might be excluded. + If ``True``, otherwise ignored or excluded files will be added anyway. As + opposed to the :manpage:`git-add(1)` command, we enable this flag by default + as the API user usually wants the item to be added even though they might be + excluded. :param fprogress: - Function with signature f(path, done=False, item=item) called for each - path to be added, one time once it is about to be added where done==False - and once after it was added where done=True. - item is set to the actual item we handle, either a Path or a BaseIndexEntry - Please note that the processed path is not guaranteed to be present - in the index already as the index is currently being processed. + Function with signature ``f(path, done=False, item=item)`` called for each + path to be added, one time once it is about to be added where ``done=False`` + and once after it was added where ``done=True``. + + ``item`` is set to the actual item we handle, either a path or a + :class:`~git.index.typ.BaseIndexEntry`. + + Please note that the processed path is not guaranteed to be present in the + index already as the index is currently being processed. :param path_rewriter: - Function with signature (string) func(BaseIndexEntry) function returning a path + Function, with signature ``(string) func(BaseIndexEntry)``, returning a path for each passed entry which is the path to be actually recorded for the - object created from entry.path. This allows you to write an index which - is not identical to the layout of the actual files on your hard-disk. - If not None and ``items`` contain plain paths, these paths will be - converted to Entries beforehand and passed to the path_rewriter. - Please note that entry.path is relative to the git repository. + object created from :attr:`entry.path <git.index.typ.BaseIndexEntry.path>`. + This allows you to write an index which is not identical to the layout of + the actual files on your hard-disk. If not ``None`` and `items` contain + plain paths, these paths will be converted to Entries beforehand and passed + to the path_rewriter. Please note that ``entry.path`` is relative to the git + repository. :param write: - If True, the index will be written once it was altered. Otherwise - the changes only exist in memory and are not available to git commands. + If ``True``, the index will be written once it was altered. Otherwise the + changes only exist in memory and are not available to git commands. :param write_extension_data: - If True, extension data will be written back to the index. This can lead to issues in case - it is containing the 'TREE' extension, which will cause the `git commit` command to write an - old tree, instead of a new one representing the now changed index. - This doesn't matter if you use `IndexFile.commit()`, which ignores the `TREE` extension altogether. - You should set it to True if you intend to use `IndexFile.commit()` exclusively while maintaining - support for third-party extensions. Besides that, you can usually safely ignore the built-in - extensions when using GitPython on repositories that are not handled manually at all. + If ``True``, extension data will be written back to the index. This can lead + to issues in case it is containing the 'TREE' extension, which will cause + the :manpage:`git-commit(1)` command to write an old tree, instead of a new + one representing the now changed index. + + This doesn't matter if you use :meth:`IndexFile.commit`, which ignores the + 'TREE' extension altogether. You should set it to ``True`` if you intend to + use :meth:`IndexFile.commit` exclusively while maintaining support for + third-party extensions. Besides that, you can usually safely ignore the + built-in extensions when using GitPython on repositories that are not + handled manually at all. + All current built-in extensions are listed here: - http://opensource.apple.com/source/Git/Git-26/src/git-htmldocs/technical/index-format.txt + https://git-scm.com/docs/index-format :return: - List(BaseIndexEntries) representing the entries just actually added. + List of :class:`~git.index.typ.BaseIndexEntry`\s representing the entries + just actually added. :raise OSError: - if a supplied Path did not exist. Please note that BaseIndexEntry - Objects that do not have a null sha will be added even if their paths - do not exist. + If a supplied path did not exist. Please note that + :class:`~git.index.typ.BaseIndexEntry` objects that do not have a null sha + will be added even if their paths do not exist. """ - # sort the entries into strings and Entries, Blobs are converted to entries - # automatically - # paths can be git-added, for everything else we use git-update-index + # Sort the entries into strings and Entries. + # Blobs are converted to entries automatically. + # Paths can be git-added. For everything else we use git-update-index. paths, entries = self._preprocess_add_items(items) entries_added: List[BaseIndexEntry] = [] - # This code needs a working tree, therefore we try not to run it unless required. + # This code needs a working tree, so we try not to run it unless required. # That way, we are OK on a bare repository as well. - # If there are no paths, the rewriter has nothing to do either + # If there are no paths, the rewriter has nothing to do either. if paths: entries_added.extend(self._entries_for_paths(paths, path_rewriter, fprogress, entries)) @@ -818,7 +894,7 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): # END null mode should be remove # HANDLE ENTRY OBJECT CREATION - # create objects if required, otherwise go with the existing shas + # Create objects if required, otherwise go with the existing shas. null_entries_indices = [i for i, e in enumerate(entries) if e.binsha == Object.NULL_BIN_SHA] if null_entries_indices: @@ -828,7 +904,7 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): null_entry = entries[ei] new_entry = self._store_path(null_entry.path, fprogress) - # update null entry + # Update null entry. entries[ei] = BaseIndexEntry( ( null_entry.mode, @@ -839,20 +915,21 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): ) # END for each entry index - # end closure + # END closure + handle_null_entries(self) # END null_entry handling # REWRITE PATHS - # If we have to rewrite the entries, do so now, after we have generated - # all object sha's + # If we have to rewrite the entries, do so now, after we have generated all + # object sha's. if path_rewriter: for i, e in enumerate(entries): entries[i] = BaseIndexEntry((e.mode, e.binsha, e.stage, path_rewriter(e))) # END for each entry # END handle path rewriting - # just go through the remaining entries and provide progress info + # Just go through the remaining entries and provide progress info. for i, entry in enumerate(entries): progress_sent = i in null_entries_indices if not progress_sent: @@ -864,7 +941,7 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): # END if there are base entries # FINALIZE - # add the new entries to this instance + # Add the new entries to this instance. for entry in entries_added: self.entries[(entry.path, 0)] = IndexEntry.from_base(entry) @@ -879,16 +956,16 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): items: Union[PathLike, Sequence[Union[PathLike, BaseIndexEntry, Blob, Submodule]]], ) -> List[PathLike]: """Returns a list of repo-relative paths from the given items which - may be absolute or relative paths, entries or blobs""" + may be absolute or relative paths, entries or blobs.""" paths = [] - # if string put in list + # If string, put in list. if isinstance(items, (str, os.PathLike)): items = [items] for item in items: if isinstance(item, (BaseIndexEntry, (Blob, Submodule))): paths.append(self._to_relative_path(item.path)) - elif isinstance(item, str): + elif isinstance(item, (str, os.PathLike)): paths.append(self._to_relative_path(item)) else: raise TypeError("Invalid item type: %r" % item) @@ -899,55 +976,59 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): @default_index def remove( self, - items: Sequence[Union[PathLike, Blob, BaseIndexEntry, "Submodule"]], + items: Union[PathLike, Sequence[Union[PathLike, Blob, BaseIndexEntry, "Submodule"]]], working_tree: bool = False, **kwargs: Any, ) -> List[str]: - """Remove the given items from the index and optionally from - the working tree as well. + R"""Remove the given items from the index and optionally from the working tree + as well. :param items: Multiple types of items are supported which may be be freely mixed. - path string + Remove the given path at all stages. If it is a directory, you must - specify the r=True keyword argument to remove all file entries - below it. If absolute paths are given, they will be converted - to a path relative to the git repository directory containing - the working tree + specify the ``r=True`` keyword argument to remove all file entries below + it. If absolute paths are given, they will be converted to a path + relative to the git repository directory containing the working tree + + The path string may include globs, such as ``*.c``. - The path string may include globs, such as \\*.c. + - :class:~`git.objects.blob.Blob` object - - Blob Object Only the path portion is used in this case. - - BaseIndexEntry or compatible type - The only relevant information here Yis the path. The stage is ignored. + - :class:`~git.index.typ.BaseIndexEntry` or compatible type + + The only relevant information here is the path. The stage is ignored. :param working_tree: - If True, the entry will also be removed from the working tree, physically - removing the respective file. This may fail if there are uncommitted changes - in it. + If ``True``, the entry will also be removed from the working tree, + physically removing the respective file. This may fail if there are + uncommitted changes in it. :param kwargs: - Additional keyword arguments to be passed to git-rm, such - as 'r' to allow recursive removal of + Additional keyword arguments to be passed to :manpage:`git-rm(1)`, such as + ``r`` to allow recursive removal. :return: - List(path_string, ...) list of repository relative paths that have - been removed effectively. - This is interesting to know in case you have provided a directory or - globs. Paths are relative to the repository.""" + List(path_string, ...) list of repository relative paths that have been + removed effectively. + + This is interesting to know in case you have provided a directory or globs. + Paths are relative to the repository. + """ args = [] if not working_tree: args.append("--cached") args.append("--") - # preprocess paths + # Preprocess paths. paths = self._items_to_rela_paths(items) removed_paths = self.repo.git.rm(args, paths, **kwargs).splitlines() - # process output to gain proper paths + # Process output to gain proper paths. # rm 'path' return [p[4:-1] for p in removed_paths] @@ -955,33 +1036,44 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): @default_index def move( self, - items: Sequence[Union[PathLike, Blob, BaseIndexEntry, "Submodule"]], + items: Union[PathLike, Sequence[Union[PathLike, Blob, BaseIndexEntry, "Submodule"]]], skip_errors: bool = False, **kwargs: Any, ) -> List[Tuple[str, str]]: """Rename/move the items, whereas the last item is considered the destination of - the move operation. If the destination is a file, the first item ( of two ) - must be a file as well. If the destination is a directory, it may be preceded - by one or more directories or files. + the move operation. + + If the destination is a file, the first item (of two) must be a file as well. + + If the destination is a directory, it may be preceded by one or more directories + or files. The working tree will be affected in non-bare repositories. - :parma items: - Multiple types of items are supported, please see the 'remove' method + :param items: + Multiple types of items are supported, please see the :meth:`remove` method for reference. + :param skip_errors: - If True, errors such as ones resulting from missing source files will - be skipped. + If ``True``, errors such as ones resulting from missing source files will be + skipped. + :param kwargs: - Additional arguments you would like to pass to git-mv, such as dry_run - or force. + Additional arguments you would like to pass to :manpage:`git-mv(1)`, such as + ``dry_run`` or ``force``. - :return: List(tuple(source_path_string, destination_path_string), ...) - A list of pairs, containing the source file moved as well as its - actual destination. Relative to the repository root. + :return: + List(tuple(source_path_string, destination_path_string), ...) + + A list of pairs, containing the source file moved as well as its actual + destination. Relative to the repository root. - :raise ValueError: If only one item was given - :raise GitCommandError: If git could not handle your request""" + :raise ValueError: + If only one item was given. + + :raise git.exc.GitCommandError: + If git could not handle your request. + """ args = [] if skip_errors: args.append("-k") @@ -993,13 +1085,13 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): was_dry_run = kwargs.pop("dry_run", kwargs.pop("n", None)) kwargs["dry_run"] = True - # first execute rename in dryrun so the command tells us what it actually does - # ( for later output ) + # First execute rename in dry run so the command tells us what it actually does + # (for later output). out = [] mvlines = self.repo.git.mv(args, paths, **kwargs).splitlines() - # parse result - first 0:n/2 lines are 'checking ', the remaining ones - # are the 'renaming' ones which we parse + # Parse result - first 0:n/2 lines are 'checking ', the remaining ones are the + # 'renaming' ones which we parse. for ln in range(int(len(mvlines) / 2), len(mvlines)): tokens = mvlines[ln].split(" to ") assert len(tokens) == 2, "Too many tokens in %s" % mvlines[ln] @@ -1009,12 +1101,12 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): out.append((tokens[0][9:], tokens[1])) # END for each line to parse - # either prepare for the real run, or output the dry-run result + # Either prepare for the real run, or output the dry-run result. if was_dry_run: return out - # END handle dryrun + # END handle dry run - # now apply the actual operation + # Now apply the actual operation. kwargs.pop("dry_run") self.repo.git.mv(args, paths, **kwargs) @@ -1023,22 +1115,31 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): def commit( self, message: str, - parent_commits: Union[Commit_ish, None] = None, + parent_commits: Union[List[Commit], None] = None, head: bool = True, - author: Union[None, "Actor"] = None, - committer: Union[None, "Actor"] = None, + author: Union[None, Actor] = None, + committer: Union[None, Actor] = None, author_date: Union[datetime.datetime, str, None] = None, commit_date: Union[datetime.datetime, str, None] = None, skip_hooks: bool = False, ) -> Commit: - """Commit the current default index file, creating a commit object. - For more information on the arguments, see Commit.create_from_tree(). - - :note: If you have manually altered the .entries member of this instance, - don't forget to write() your changes to disk beforehand. - Passing skip_hooks=True is the equivalent of using `-n` - or `--no-verify` on the command line. - :return: Commit object representing the new commit""" + """Commit the current default index file, creating a + :class:`~git.objects.commit.Commit` object. + + For more information on the arguments, see + :meth:`Commit.create_from_tree <git.objects.commit.Commit.create_from_tree>`. + + :note: + If you have manually altered the :attr:`entries` member of this instance, + don't forget to :meth:`write` your changes to disk beforehand. + + :note: + Passing ``skip_hooks=True`` is the equivalent of using ``-n`` or + ``--no-verify`` on the command line. + + :return: + :class:`~git.objects.commit.Commit` object representing the new commit + """ if not skip_hooks: run_commit_hook("pre-commit", self) @@ -1099,47 +1200,52 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): fprogress: Callable = lambda *args: None, **kwargs: Any, ) -> Union[None, Iterator[PathLike], Sequence[PathLike]]: - """Checkout the given paths or all files from the version known to the index into - the working tree. + """Check out the given paths or all files from the version known to the index + into the working tree. - :note: Be sure you have written pending changes using the ``write`` method - in case you have altered the enties dictionary directly + :note: + Be sure you have written pending changes using the :meth:`write` method in + case you have altered the entries dictionary directly. :param paths: - If None, all paths in the index will be checked out. Otherwise an iterable - of relative or absolute paths or a single path pointing to files or directories - in the index is expected. + If ``None``, all paths in the index will be checked out. + Otherwise an iterable of relative or absolute paths or a single path + pointing to files or directories in the index is expected. :param force: - If True, existing files will be overwritten even if they contain local modifications. - If False, these will trigger a CheckoutError. + If ``True``, existing files will be overwritten even if they contain local + modifications. + If ``False``, these will trigger a :exc:`~git.exc.CheckoutError`. :param fprogress: - see :func:`IndexFile.add` for signature and explanation. - The provided progress information will contain None as path and item if no - explicit paths are given. Otherwise progress information will be send - prior and after a file has been checked out + See :meth:`IndexFile.add` for signature and explanation. + + The provided progress information will contain ``None`` as path and item if + no explicit paths are given. Otherwise progress information will be send + prior and after a file has been checked out. :param kwargs: - Additional arguments to be passed to git-checkout-index + Additional arguments to be passed to :manpage:`git-checkout-index(1)`. :return: - iterable yielding paths to files which have been checked out and are - guaranteed to match the version stored in the index - - :raise exc.CheckoutError: - If at least one file failed to be checked out. This is a summary, - hence it will checkout as many files as it can anyway. - If one of files or directories do not exist in the index - ( as opposed to the original git command who ignores them ). - Raise GitCommandError if error lines could not be parsed - this truly is - an exceptional state - - .. note:: The checkout is limited to checking out the files in the - index. Files which are not in the index anymore and exist in - the working tree will not be deleted. This behaviour is fundamentally - different to *head.checkout*, i.e. if you want git-checkout like behaviour, - use head.checkout instead of index.checkout. + Iterable yielding paths to files which have been checked out and are + guaranteed to match the version stored in the index. + + :raise git.exc.CheckoutError: + * If at least one file failed to be checked out. This is a summary, hence it + will checkout as many files as it can anyway. + * If one of files or directories do not exist in the index (as opposed to + the original git command, which ignores them). + + :raise git.exc.GitCommandError: + If error lines could not be parsed - this truly is an exceptional state. + + :note: + The checkout is limited to checking out the files in the index. Files which + are not in the index anymore and exist in the working tree will not be + deleted. This behaviour is fundamentally different to ``head.checkout``, + i.e. if you want :manpage:`git-checkout(1)`-like behaviour, use + ``head.checkout`` instead of ``index.checkout``. """ args = ["--index"] if force: @@ -1152,9 +1258,9 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): def handle_stderr(proc: "Popen[bytes]", iter_checked_out_files: Iterable[PathLike]) -> None: stderr_IO = proc.stderr if not stderr_IO: - return None # return early if stderr empty - else: - stderr_bytes = stderr_IO.read() + return # Return early if stderr empty. + + stderr_bytes = stderr_IO.read() # line contents: stderr = stderr_bytes.decode(defenc) # git-checkout-index: this already exists @@ -1218,10 +1324,10 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): if isinstance(paths, str): paths = [paths] - # make sure we have our entries loaded before we start checkout_index - # which will hold a lock on it. We try to get the lock as well during - # our entries initialization - self.entries + # Make sure we have our entries loaded before we start checkout_index, which + # will hold a lock on it. We try to get the lock as well during our entries + # initialization. + self.entries # noqa: B018 args.append("--stdin") kwargs["as_process"] = True @@ -1233,7 +1339,7 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): for path in paths: co_path = to_native_path_linux(self._to_relative_path(path)) - # if the item is not in the index, it could be a directory + # If the item is not in the index, it could be a directory. path_is_directory = False try: @@ -1261,7 +1367,7 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): self._flush_stdin_and_wait(proc, ignore_stdout=True) except GitCommandError: # Without parsing stdout we don't know what failed. - raise CheckoutError( + raise CheckoutError( # noqa: B904 "Some files could not be checked out from the index, probably because they didn't exist.", failed_files, [], @@ -1281,40 +1387,51 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): head: bool = False, **kwargs: Any, ) -> "IndexFile": - """Reset the index to reflect the tree at the given commit. This will not - adjust our HEAD reference as opposed to HEAD.reset by default. + """Reset the index to reflect the tree at the given commit. This will not adjust + our HEAD reference by default, as opposed to + :meth:`HEAD.reset <git.refs.head.HEAD.reset>`. :param commit: - Revision, Reference or Commit specifying the commit we should represent. - If you want to specify a tree only, use IndexFile.from_tree and overwrite - the default index. + Revision, :class:`~git.refs.reference.Reference` or + :class:`~git.objects.commit.Commit` specifying the commit we should + represent. + + If you want to specify a tree only, use :meth:`IndexFile.from_tree` and + overwrite the default index. :param working_tree: - If True, the files in the working tree will reflect the changed index. - If False, the working tree will not be touched + If ``True``, the files in the working tree will reflect the changed index. + If ``False``, the working tree will not be touched. Please note that changes to the working copy will be discarded without - warning ! + warning! :param head: - If True, the head will be set to the given commit. This is False by default, - but if True, this method behaves like HEAD.reset. + If ``True``, the head will be set to the given commit. This is ``False`` by + default, but if ``True``, this method behaves like + :meth:`HEAD.reset <git.refs.head.HEAD.reset>`. - :param paths: if given as an iterable of absolute or repository-relative paths, - only these will be reset to their state at the given commit'ish. + :param paths: + If given as an iterable of absolute or repository-relative paths, only these + will be reset to their state at the given commit-ish. The paths need to exist at the commit, otherwise an exception will be raised. :param kwargs: - Additional keyword arguments passed to git-reset + Additional keyword arguments passed to :manpage:`git-reset(1)`. - .. note:: IndexFile.reset, as opposed to HEAD.reset, will not delete anyfiles - in order to maintain a consistent working tree. Instead, it will just - checkout the files according to their state in the index. - If you want git-reset like behaviour, use *HEAD.reset* instead. + :note: + :meth:`IndexFile.reset`, as opposed to + :meth:`HEAD.reset <git.refs.head.HEAD.reset>`, will not delete any files in + order to maintain a consistent working tree. Instead, it will just check out + the files according to their state in the index. + If you want :manpage:`git-reset(1)`-like behaviour, use + :meth:`HEAD.reset <git.refs.head.HEAD.reset>` instead. - :return: self""" - # what we actually want to do is to merge the tree into our existing - # index, which is what git-read-tree does + :return: + self + """ + # What we actually want to do is to merge the tree into our existing index, + # which is what git-read-tree does. new_inst = type(self).from_tree(self.repo, commit) if not paths: self.entries = new_inst.entries @@ -1326,7 +1443,7 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): key = entry_key(path, 0) self.entries[key] = nie[key] except KeyError: - # if key is not in theirs, it musn't be in ours + # If key is not in theirs, it mustn't be in ours. try: del self.entries[key] except KeyError: @@ -1347,48 +1464,55 @@ class IndexFile(LazyMixin, git_diff.Diffable, Serializable): return self - # @ default_index, breaks typing for some reason, copied into function + # FIXME: This is documented to accept the same parameters as Diffable.diff, but this + # does not handle NULL_TREE for `other`. (The suppressed mypy error is about this.) def diff( - self, # type: ignore[override] - other: Union[Type["git_diff.Diffable.Index"], "Tree", "Commit", str, None] = git_diff.Diffable.Index, + self, + other: Union[ # type: ignore[override] + Literal[git_diff.DiffConstants.INDEX], + "Tree", + "Commit", + str, + None, + ] = git_diff.INDEX, paths: Union[PathLike, List[PathLike], Tuple[PathLike, ...], None] = None, create_patch: bool = False, **kwargs: Any, - ) -> git_diff.DiffIndex: - """Diff this index against the working copy or a Tree or Commit object + ) -> git_diff.DiffIndex[git_diff.Diff]: + """Diff this index against the working copy or a :class:`~git.objects.tree.Tree` + or :class:`~git.objects.commit.Commit` object. - For a documentation of the parameters and return values, see, - Diffable.diff + For documentation of the parameters and return values, see + :meth:`Diffable.diff <git.diff.Diffable.diff>`. :note: - Will only work with indices that represent the default git index as - they have not been initialized with a stream. + Will only work with indices that represent the default git index as they + have not been initialized with a stream. """ - - # only run if we are the default repository index + # Only run if we are the default repository index. if self._file_path != self._index_path(): raise AssertionError("Cannot call %r on indices that do not represent the default git index" % self.diff()) - # index against index is always empty - if other is self.Index: + # Index against index is always empty. + if other is self.INDEX: return git_diff.DiffIndex() - # index against anything but None is a reverse diff with the respective - # item. Handle existing -R flags properly. Transform strings to the object - # so that we can call diff on it + # Index against anything but None is a reverse diff with the respective item. + # Handle existing -R flags properly. + # Transform strings to the object so that we can call diff on it. if isinstance(other, str): other = self.repo.rev_parse(other) # END object conversion - if isinstance(other, Object): # for Tree or Commit - # invert the existing R flag + if isinstance(other, Object): # For Tree or Commit. + # Invert the existing R flag. cur_val = kwargs.get("R", False) kwargs["R"] = not cur_val - return other.diff(self.Index, paths, create_patch, **kwargs) + return other.diff(self.INDEX, paths, create_patch, **kwargs) # END diff against other item handling - # if other is not None here, something is wrong + # If other is not None here, something is wrong. if other is not None: - raise ValueError("other must be None, Diffable.Index, a Tree or Commit, was %r" % other) + raise ValueError("other must be None, Diffable.INDEX, a Tree or Commit, was %r" % other) - # diff against working copy - can be handled by superclass natively - return super(IndexFile, self).diff(other, paths, create_patch, **kwargs) + # Diff against working copy - can be handled by superclass natively. + return super().diff(other, paths, create_patch, **kwargs) diff --git a/git/index/fun.py b/git/index/fun.py index b50f1f4..59cce6a 100644 --- a/git/index/fun.py +++ b/git/index/fun.py @@ -1,41 +1,40 @@ -# Contains standalone functions to accompany the index implementation and make it -# more versatile -# NOTE: Autodoc hates it if this is a docstring +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Standalone functions to accompany the index implementation and make it more +versatile.""" + +__all__ = [ + "write_cache", + "read_cache", + "write_tree_from_cache", + "entry_key", + "stat_mode_to_index_mode", + "S_IFGITLINK", + "run_commit_hook", + "hook_path", +] from io import BytesIO -from pathlib import Path import os -from stat import ( - S_IFDIR, - S_IFLNK, - S_ISLNK, - S_ISDIR, - S_IFMT, - S_IFREG, - S_IXUSR, -) +import os.path as osp +from pathlib import Path +from stat import S_IFDIR, S_IFLNK, S_IFMT, S_IFREG, S_ISDIR, S_ISLNK, S_IXUSR import subprocess +import sys -from git.cmd import PROC_CREATIONFLAGS, handle_process_output -from git.compat import ( - defenc, - force_text, - force_bytes, - is_posix, - is_win, - safe_decode, -) -from git.exc import UnmergedEntriesError, HookExecutionError +from gitdb.base import IStream +from gitdb.typ import str_tree_type + +from git.cmd import handle_process_output, safer_popen +from git.compat import defenc, force_bytes, force_text, safe_decode +from git.exc import HookExecutionError, UnmergedEntriesError from git.objects.fun import ( - tree_to_stream, traverse_tree_recursive, traverse_trees_recursive, + tree_to_stream, ) from git.util import IndexFileSHA1Writer, finalize_process -from gitdb.base import IStream -from gitdb.typ import str_tree_type - -import os.path as osp from .typ import BaseIndexEntry, IndexEntry, CE_NAMEMASK, CE_STAGESHIFT from .util import pack, unpack @@ -47,29 +46,18 @@ from typing import Dict, IO, List, Sequence, TYPE_CHECKING, Tuple, Type, Union, from git.types import PathLike if TYPE_CHECKING: - from .base import IndexFile from git.db import GitCmdObjectDB from git.objects.tree import TreeCacheTup - # from git.objects.fun import EntryTupOrNone + from .base import IndexFile # ------------------------------------------------------------------------------------ +S_IFGITLINK = S_IFLNK | S_IFDIR +"""Flags for a submodule.""" -S_IFGITLINK = S_IFLNK | S_IFDIR # a submodule CE_NAMEMASK_INV = ~CE_NAMEMASK -__all__ = ( - "write_cache", - "read_cache", - "write_tree_from_cache", - "entry_key", - "stat_mode_to_index_mode", - "S_IFGITLINK", - "run_commit_hook", - "hook_path", -) - def hook_path(name: str, git_dir: PathLike) -> str: """:return: path to the given named hook in the given git repository directory""" @@ -81,35 +69,40 @@ def _has_file_extension(path: str) -> str: def run_commit_hook(name: str, index: "IndexFile", *args: str) -> None: - """Run the commit hook of the given name. Silently ignores hooks that do not exist. + """Run the commit hook of the given name. Silently ignore hooks that do not exist. + + :param name: + Name of hook, like ``pre-commit``. - :param name: name of hook, like 'pre-commit' - :param index: IndexFile instance - :param args: arguments passed to hook file - :raises HookExecutionError:""" + :param index: + :class:`~git.index.base.IndexFile` instance. + + :param args: + Arguments passed to hook file. + + :raise git.exc.HookExecutionError: + """ hp = hook_path(name, index.repo.git_dir) if not os.access(hp, os.X_OK): - return None + return env = os.environ.copy() env["GIT_INDEX_FILE"] = safe_decode(str(index.path)) env["GIT_EDITOR"] = ":" cmd = [hp] try: - if is_win and not _has_file_extension(hp): + if sys.platform == "win32" and not _has_file_extension(hp): # Windows only uses extensions to determine how to open files # (doesn't understand shebangs). Try using bash to run the hook. relative_hp = Path(hp).relative_to(index.repo.working_dir).as_posix() cmd = ["bash.exe", relative_hp] - process = subprocess.Popen( + process = safer_popen( cmd + list(args), env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=index.repo.working_dir, - close_fds=is_posix, - creationflags=PROC_CREATIONFLAGS, ) except Exception as ex: raise HookExecutionError(hp, ex) from ex @@ -123,12 +116,12 @@ def run_commit_hook(name: str, index: "IndexFile", *args: str) -> None: stdout = force_text(stdout, defenc) stderr = force_text(stderr, defenc) raise HookExecutionError(hp, process.returncode, stderr, stdout) - # end handle return code + # END handle return code def stat_mode_to_index_mode(mode: int) -> int: - """Convert the given mode from a stat call to the corresponding index mode - and return it""" + """Convert the given mode from a stat call to the corresponding index mode and + return it.""" if S_ISLNK(mode): # symlinks return S_IFLNK if S_ISDIR(mode) or S_IFMT(mode) == S_IFGITLINK: # submodules @@ -142,38 +135,43 @@ def write_cache( extension_data: Union[None, bytes] = None, ShaStreamCls: Type[IndexFileSHA1Writer] = IndexFileSHA1Writer, ) -> None: - """Write the cache represented by entries to a stream + """Write the cache represented by entries to a stream. - :param entries: **sorted** list of entries - :param stream: stream to wrap into the AdapterStreamCls - it is used for - final output. + :param entries: + **Sorted** list of entries. - :param ShaStreamCls: Type to use when writing to the stream. It produces a sha - while writing to it, before the data is passed on to the wrapped stream + :param stream: + Stream to wrap into the AdapterStreamCls - it is used for final output. - :param extension_data: any kind of data to write as a trailer, it must begin - a 4 byte identifier, followed by its size ( 4 bytes )""" - # wrap the stream into a compatible writer + :param ShaStreamCls: + Type to use when writing to the stream. It produces a sha while writing to it, + before the data is passed on to the wrapped stream. + + :param extension_data: + Any kind of data to write as a trailer, it must begin a 4 byte identifier, + followed by its size (4 bytes). + """ + # Wrap the stream into a compatible writer. stream_sha = ShaStreamCls(stream) tell = stream_sha.tell write = stream_sha.write - # header + # Header version = 2 write(b"DIRC") write(pack(">LL", version, len(entries))) - # body + # Body for entry in entries: beginoffset = tell() write(entry.ctime_bytes) # ctime write(entry.mtime_bytes) # mtime path_str = str(entry.path) path: bytes = force_bytes(path_str, encoding=defenc) - plen = len(path) & CE_NAMEMASK # path length + plen = len(path) & CE_NAMEMASK # Path length assert plen == len(path), "Path %s too long to fit into index" % entry.path - flags = plen | (entry.flags & CE_NAMEMASK_INV) # clear possible previous values + flags = plen | (entry.flags & CE_NAMEMASK_INV) # Clear possible previous values. write( pack( ">LLLLLL20sH", @@ -192,30 +190,36 @@ def write_cache( write(b"\0" * ((beginoffset + real_size) - tell())) # END for each entry - # write previously cached extensions data + # Write previously cached extensions data. if extension_data is not None: stream_sha.write(extension_data) - # write the sha over the content + # Write the sha over the content. stream_sha.write_sha() def read_header(stream: IO[bytes]) -> Tuple[int, int]: - """Return tuple(version_long, num_entries) from the given stream""" + """Return tuple(version_long, num_entries) from the given stream.""" type_id = stream.read(4) if type_id != b"DIRC": raise AssertionError("Invalid index file header: %r" % type_id) unpacked = cast(Tuple[int, int], unpack(">LL", stream.read(4 * 2))) version, num_entries = unpacked - # TODO: handle version 3: extended data, see read-cache.c + # TODO: Handle version 3: extended data, see read-cache.c. assert version in (1, 2) return version, num_entries def entry_key(*entry: Union[BaseIndexEntry, PathLike, int]) -> Tuple[PathLike, int]: - """:return: Key suitable to be used for the index.entries dictionary - :param entry: One instance of type BaseIndexEntry or the path and the stage""" + """ + :return: + Key suitable to be used for the + :attr:`index.entries <git.index.base.IndexFile.entries>` dictionary. + + :param entry: + One instance of type BaseIndexEntry or the path and the stage. + """ # def is_entry_key_tup(entry_key: Tuple) -> TypeGuard[Tuple[PathLike, int]]: # return isinstance(entry_key, tuple) and len(entry_key) == 2 @@ -234,14 +238,17 @@ def entry_key(*entry: Union[BaseIndexEntry, PathLike, int]) -> Tuple[PathLike, i def read_cache( stream: IO[bytes], ) -> Tuple[int, Dict[Tuple[PathLike, int], "IndexEntry"], bytes, bytes]: - """Read a cache file from the given stream + """Read a cache file from the given stream. - :return: tuple(version, entries_dict, extension_data, content_sha) + :return: + tuple(version, entries_dict, extension_data, content_sha) - * version is the integer version number - * entries dict is a dictionary which maps IndexEntry instances to a path at a stage - * extension_data is '' or 4 bytes of type + 4 bytes of size + size bytes - * content_sha is a 20 byte sha on all cache file contents""" + * *version* is the integer version number. + * *entries_dict* is a dictionary which maps IndexEntry instances to a path at a + stage. + * *extension_data* is ``""`` or 4 bytes of type + 4 bytes of size + size bytes. + * *content_sha* is a 20 byte sha on all cache file contents. + """ version, num_entries = read_header(stream) count = 0 entries: Dict[Tuple[PathLike, int], "IndexEntry"] = {} @@ -259,25 +266,25 @@ def read_cache( real_size = (tell() - beginoffset + 8) & ~7 read((beginoffset + real_size) - tell()) entry = IndexEntry((mode, sha, flags, path, ctime, mtime, dev, ino, uid, gid, size)) - # entry_key would be the method to use, but we safe the effort + # entry_key would be the method to use, but we save the effort. entries[(path, entry.stage)] = entry count += 1 # END for each entry - # the footer contains extension data and a sha on the content so far - # Keep the extension footer,and verify we have a sha in the end + # The footer contains extension data and a sha on the content so far. + # Keep the extension footer,and verify we have a sha in the end. # Extension data format is: - # 4 bytes ID - # 4 bytes length of chunk - # repeated 0 - N times + # 4 bytes ID + # 4 bytes length of chunk + # Repeated 0 - N times extension_data = stream.read(~0) - assert ( - len(extension_data) > 19 - ), "Index Footer was not at least a sha on content as it was only %i bytes in size" % len(extension_data) + assert len(extension_data) > 19, ( + "Index Footer was not at least a sha on content as it was only %i bytes in size" % len(extension_data) + ) content_sha = extension_data[-20:] - # truncate the sha in the end as we will dynamically create it anyway + # Truncate the sha in the end as we will dynamically create it anyway. extension_data = extension_data[:-20] return (version, entries, extension_data, content_sha) @@ -286,15 +293,26 @@ def read_cache( def write_tree_from_cache( entries: List[IndexEntry], odb: "GitCmdObjectDB", sl: slice, si: int = 0 ) -> Tuple[bytes, List["TreeCacheTup"]]: - """Create a tree from the given sorted list of entries and put the respective - trees into the given object database - - :param entries: **sorted** list of IndexEntries - :param odb: object database to store the trees in - :param si: start index at which we should start creating subtrees - :param sl: slice indicating the range we should process on the entries list - :return: tuple(binsha, list(tree_entry, ...)) a tuple of a sha and a list of - tree entries being a tuple of hexsha, mode, name""" + R"""Create a tree from the given sorted list of entries and put the respective + trees into the given object database. + + :param entries: + **Sorted** list of :class:`~git.index.typ.IndexEntry`\s. + + :param odb: + Object database to store the trees in. + + :param si: + Start index at which we should start creating subtrees. + + :param sl: + Slice indicating the range we should process on the entries list. + + :return: + tuple(binsha, list(tree_entry, ...)) + + A tuple of a sha and a list of tree entries being a tuple of hexsha, mode, name. + """ tree_items: List["TreeCacheTup"] = [] ci = sl.start @@ -307,10 +325,10 @@ def write_tree_from_cache( ci += 1 rbound = entry.path.find("/", si) if rbound == -1: - # its not a tree + # It's not a tree. tree_items.append((entry.binsha, entry.mode, entry.path[si:])) else: - # find common base range + # Find common base range. base = entry.path[si:rbound] xi = ci while xi < end: @@ -322,19 +340,19 @@ def write_tree_from_cache( xi += 1 # END find common base - # enter recursion - # ci - 1 as we want to count our current item as well + # Enter recursion. + # ci - 1 as we want to count our current item as well. sha, _tree_entry_list = write_tree_from_cache(entries, odb, slice(ci - 1, xi), rbound + 1) tree_items.append((sha, S_IFDIR, base)) - # skip ahead + # Skip ahead. ci = xi # END handle bounds # END for each entry - # finally create the tree + # Finally create the tree. sio = BytesIO() - tree_to_stream(tree_items, sio.write) # writes to stream as bytes, but doesn't change tree_items + tree_to_stream(tree_items, sio.write) # Writes to stream as bytes, but doesn't change tree_items. sio.seek(0) istream = odb.store(IStream(str_tree_type, len(sio.getvalue()), sio)) @@ -346,17 +364,21 @@ def _tree_entry_to_baseindexentry(tree_entry: "TreeCacheTup", stage: int) -> Bas def aggressive_tree_merge(odb: "GitCmdObjectDB", tree_shas: Sequence[bytes]) -> List[BaseIndexEntry]: + R""" + :return: + List of :class:`~git.index.typ.BaseIndexEntry`\s representing the aggressive + merge of the given trees. All valid entries are on stage 0, whereas the + conflicting ones are left on stage 1, 2 or 3, whereas stage 1 corresponds to the + common ancestor tree, 2 to our tree and 3 to 'their' tree. + + :param tree_shas: + 1, 2 or 3 trees as identified by their binary 20 byte shas. If 1 or two, the + entries will effectively correspond to the last given tree. If 3 are given, a 3 + way merge is performed. """ - :return: list of BaseIndexEntries representing the aggressive merge of the given - trees. All valid entries are on stage 0, whereas the conflicting ones are left - on stage 1, 2 or 3, whereas stage 1 corresponds to the common ancestor tree, - 2 to our tree and 3 to 'their' tree. - :param tree_shas: 1, 2 or 3 trees as identified by their binary 20 byte shas - If 1 or two, the entries will effectively correspond to the last given tree - If 3 are given, a 3 way merge is performed""" out: List[BaseIndexEntry] = [] - # one and two way is the same for us, as we don't have to handle an existing + # One and two way is the same for us, as we don't have to handle an existing # index, instrea if len(tree_shas) in (1, 2): for entry in traverse_tree_recursive(odb, tree_shas[-1], ""): @@ -368,72 +390,72 @@ def aggressive_tree_merge(odb: "GitCmdObjectDB", tree_shas: Sequence[bytes]) -> if len(tree_shas) > 3: raise ValueError("Cannot handle %i trees at once" % len(tree_shas)) - # three trees + # Three trees. for base, ours, theirs in traverse_trees_recursive(odb, tree_shas, ""): if base is not None: - # base version exists + # Base version exists. if ours is not None: - # ours exists + # Ours exists. if theirs is not None: - # it exists in all branches, if it was changed in both - # its a conflict, otherwise we take the changed version - # This should be the most common branch, so it comes first + # It exists in all branches. Ff it was changed in both + # its a conflict. Otherwise, we take the changed version. + # This should be the most common branch, so it comes first. if (base[0] != ours[0] and base[0] != theirs[0] and ours[0] != theirs[0]) or ( base[1] != ours[1] and base[1] != theirs[1] and ours[1] != theirs[1] ): - # changed by both + # Changed by both. out.append(_tree_entry_to_baseindexentry(base, 1)) out.append(_tree_entry_to_baseindexentry(ours, 2)) out.append(_tree_entry_to_baseindexentry(theirs, 3)) elif base[0] != ours[0] or base[1] != ours[1]: - # only we changed it + # Only we changed it. out.append(_tree_entry_to_baseindexentry(ours, 0)) else: - # either nobody changed it, or they did. In either - # case, use theirs + # Either nobody changed it, or they did. In either + # case, use theirs. out.append(_tree_entry_to_baseindexentry(theirs, 0)) # END handle modification else: if ours[0] != base[0] or ours[1] != base[1]: - # they deleted it, we changed it, conflict + # They deleted it, we changed it, conflict. out.append(_tree_entry_to_baseindexentry(base, 1)) out.append(_tree_entry_to_baseindexentry(ours, 2)) # else: - # we didn't change it, ignore + # # We didn't change it, ignore. # pass # END handle our change # END handle theirs else: if theirs is None: - # deleted in both, its fine - its out + # Deleted in both, its fine - it's out. pass else: if theirs[0] != base[0] or theirs[1] != base[1]: - # deleted in ours, changed theirs, conflict + # Deleted in ours, changed theirs, conflict. out.append(_tree_entry_to_baseindexentry(base, 1)) out.append(_tree_entry_to_baseindexentry(theirs, 3)) # END theirs changed # else: - # theirs didn't change + # # Theirs didn't change. # pass # END handle theirs # END handle ours else: - # all three can't be None + # All three can't be None. if ours is None: - # added in their branch + # Added in their branch. assert theirs is not None out.append(_tree_entry_to_baseindexentry(theirs, 0)) elif theirs is None: - # added in our branch + # Added in our branch. out.append(_tree_entry_to_baseindexentry(ours, 0)) else: - # both have it, except for the base, see whether it changed + # Both have it, except for the base, see whether it changed. if ours[0] != theirs[0] or ours[1] != theirs[1]: out.append(_tree_entry_to_baseindexentry(ours, 2)) out.append(_tree_entry_to_baseindexentry(theirs, 3)) else: - # it was added the same in both + # It was added the same in both. out.append(_tree_entry_to_baseindexentry(ours, 0)) # END handle two items # END handle heads diff --git a/git/index/typ.py b/git/index/typ.py index b2c6c37..9742525 100644 --- a/git/index/typ.py +++ b/git/index/typ.py @@ -1,15 +1,20 @@ -"""Module with additional types used by the index""" +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Additional types used by the index.""" + +__all__ = ["BlobFilter", "BaseIndexEntry", "IndexEntry", "StageType"] from binascii import b2a_hex from pathlib import Path -from .util import pack, unpack from git.objects import Blob +from .util import pack, unpack # typing ---------------------------------------------------------------------- -from typing import NamedTuple, Sequence, TYPE_CHECKING, Tuple, Union, cast, List +from typing import NamedTuple, Sequence, TYPE_CHECKING, Tuple, Union, cast from git.types import PathLike @@ -20,8 +25,6 @@ StageType = int # --------------------------------------------------------------------------------- -__all__ = ("BlobFilter", "BaseIndexEntry", "IndexEntry", "StageType") - # { Invariants CE_NAMEMASK = 0x0FFF CE_STAGEMASK = 0x3000 @@ -32,22 +35,21 @@ CE_STAGESHIFT = 12 # } END invariants -class BlobFilter(object): - - """ - Predicate to be used by iter_blobs allowing to filter only return blobs which - match the given list of directories or files. +class BlobFilter: + """Predicate to be used by + :meth:`IndexFile.iter_blobs <git.index.base.IndexFile.iter_blobs>` allowing to + filter only return blobs which match the given list of directories or files. The given paths are given relative to the repository. """ - __slots__ = "paths" + __slots__ = ("paths",) def __init__(self, paths: Sequence[PathLike]) -> None: """ :param paths: - tuple or list of paths which are either pointing to directories or - to files relative to the current repository + Tuple or list of paths which are either pointing to directories or to files + relative to the current repository. """ self.paths = paths @@ -56,9 +58,10 @@ class BlobFilter(object): blob_path: Path = blob_pathlike if isinstance(blob_pathlike, Path) else Path(blob_pathlike) for pathlike in self.paths: path: Path = pathlike if isinstance(pathlike, Path) else Path(pathlike) - # TODO: Change to use `PosixPath.is_relative_to` once Python 3.8 is no longer supported. - filter_parts: List[str] = path.parts - blob_parts: List[str] = blob_path.parts + # TODO: Change to use `PosixPath.is_relative_to` once Python 3.8 is no + # longer supported. + filter_parts = path.parts + blob_parts = blob_path.parts if len(filter_parts) > len(blob_parts): continue if all(i == j for i, j in zip(filter_parts, blob_parts)): @@ -67,8 +70,11 @@ class BlobFilter(object): class BaseIndexEntryHelper(NamedTuple): - """Typed namedtuple to provide named attribute access for BaseIndexEntry. - Needed to allow overriding __new__ in child class to preserve backwards compat.""" + """Typed named tuple to provide named attribute access for :class:`BaseIndexEntry`. + + This is needed to allow overriding ``__new__`` in child class to preserve backwards + compatibility. + """ mode: int binsha: bytes @@ -84,13 +90,12 @@ class BaseIndexEntryHelper(NamedTuple): class BaseIndexEntry(BaseIndexEntryHelper): - - """Small Brother of an index entry which can be created to describe changes + R"""Small brother of an index entry which can be created to describe changes done to the index in which case plenty of additional information is not required. - As the first 4 data members match exactly to the IndexEntry type, methods - expecting a BaseIndexEntry can also handle full IndexEntries even if they - use numeric indices for performance reasons. + As the first 4 data members match exactly to the :class:`IndexEntry` type, methods + expecting a :class:`BaseIndexEntry` can also handle full :class:`IndexEntry`\s even + if they use numeric indices for performance reasons. """ def __new__( @@ -100,7 +105,8 @@ class BaseIndexEntry(BaseIndexEntryHelper): Tuple[int, bytes, int, PathLike, bytes, bytes, int, int, int, int, int], ], ) -> "BaseIndexEntry": - """Override __new__ to allow construction from a tuple for backwards compatibility""" + """Override ``__new__`` to allow construction from a tuple for backwards + compatibility.""" return super().__new__(cls, *inp_tuple) def __str__(self) -> str: @@ -121,9 +127,10 @@ class BaseIndexEntry(BaseIndexEntryHelper): * 0 = default stage * 1 = stage before a merge or common ancestor entry in case of a 3 way merge * 2 = stage of entries from the 'left' side of the merge - * 3 = stage of entries from the right side of the merge + * 3 = stage of entries from the 'right' side of the merge - :note: For more information, see http://www.kernel.org/pub/software/scm/git/docs/git-read-tree.html + :note: + For more information, see :manpage:`git-read-tree(1)`. """ return (self.flags & CE_STAGEMASK) >> CE_STAGESHIFT @@ -138,35 +145,39 @@ class BaseIndexEntry(BaseIndexEntryHelper): class IndexEntry(BaseIndexEntry): + """Allows convenient access to index entry data as defined in + :class:`BaseIndexEntry` without completely unpacking it. - """Allows convenient access to IndexEntry data without completely unpacking it. - - Attributes usully accessed often are cached in the tuple whereas others are + Attributes usually accessed often are cached in the tuple whereas others are unpacked on demand. - See the properties for a mapping between names and tuple indices.""" + See the properties for a mapping between names and tuple indices. + """ @property def ctime(self) -> Tuple[int, int]: """ :return: Tuple(int_time_seconds_since_epoch, int_nano_seconds) of the - file's creation time""" + file's creation time + """ return cast(Tuple[int, int], unpack(">LL", self.ctime_bytes)) @property def mtime(self) -> Tuple[int, int]: - """See ctime property, but returns modification time""" + """See :attr:`ctime` property, but returns modification time.""" return cast(Tuple[int, int], unpack(">LL", self.mtime_bytes)) @classmethod def from_base(cls, base: "BaseIndexEntry") -> "IndexEntry": """ :return: - Minimal entry as created from the given BaseIndexEntry instance. - Missing values will be set to null-like values + Minimal entry as created from the given :class:`BaseIndexEntry` instance. + Missing values will be set to null-like values. - :param base: Instance of type BaseIndexEntry""" + :param base: + Instance of type :class:`BaseIndexEntry`. + """ time = pack(">LL", 0, 0) return IndexEntry((base.mode, base.binsha, base.flags, base.path, time, time, 0, 0, 0, 0, 0)) diff --git a/git/index/util.py b/git/index/util.py index 6cf838f..e59cb60 100644 --- a/git/index/util.py +++ b/git/index/util.py @@ -1,52 +1,48 @@ -"""Module containing index utilities""" +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Index utilities.""" + +__all__ = ["TemporaryFileSwap", "post_clear_cache", "default_index", "git_working_dir"] + +import contextlib from functools import wraps import os +import os.path as osp import struct import tempfile from types import TracebackType -from git.compat import is_win - -import os.path as osp - - # typing ---------------------------------------------------------------------- from typing import Any, Callable, TYPE_CHECKING, Optional, Type -from git.types import PathLike, _T +from git.types import Literal, PathLike, _T if TYPE_CHECKING: from git.index import IndexFile # --------------------------------------------------------------------------------- - -__all__ = ("TemporaryFileSwap", "post_clear_cache", "default_index", "git_working_dir") - # { Aliases pack = struct.pack unpack = struct.unpack - - # } END aliases -class TemporaryFileSwap(object): - - """Utility class moving a file to a temporary location within the same directory - and moving it back on to where on object deletion.""" +class TemporaryFileSwap: + """Utility class moving a file to a temporary location within the same directory and + moving it back on to where on object deletion.""" __slots__ = ("file_path", "tmp_file_path") def __init__(self, file_path: PathLike) -> None: self.file_path = file_path - self.tmp_file_path = str(self.file_path) + tempfile.mktemp("", "", "") - # it may be that the source does not exist - try: - os.rename(self.file_path, self.tmp_file_path) - except OSError: - pass + dirname, basename = osp.split(file_path) + fd, self.tmp_file_path = tempfile.mkstemp(prefix=basename, dir=dirname) + os.close(fd) + with contextlib.suppress(OSError): # It may be that the source does not exist. + os.replace(self.file_path, self.tmp_file_path) def __enter__(self) -> "TemporaryFileSwap": return self @@ -56,12 +52,9 @@ class TemporaryFileSwap(object): exc_type: Optional[Type[BaseException]], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType], - ) -> bool: + ) -> Literal[False]: if osp.isfile(self.tmp_file_path): - if is_win and osp.exists(self.file_path): - os.remove(self.file_path) - os.rename(self.tmp_file_path, self.file_path) - + os.replace(self.tmp_file_path, self.file_path) return False @@ -69,13 +62,10 @@ class TemporaryFileSwap(object): def post_clear_cache(func: Callable[..., _T]) -> Callable[..., _T]: - """Decorator for functions that alter the index using the git command. This would - invalidate our possibly existing entries dictionary which is why it must be - deleted to allow it to be lazily reread later. + """Decorator for functions that alter the index using the git command. - :note: - This decorator will not be required once all functions are implemented - natively which in fact is possible, but probably not feasible performance wise. + When a git command alters the index, this invalidates our possibly existing entries + dictionary, which is why it must be deleted to allow it to be lazily reread later. """ @wraps(func) @@ -90,9 +80,11 @@ def post_clear_cache(func: Callable[..., _T]) -> Callable[..., _T]: def default_index(func: Callable[..., _T]) -> Callable[..., _T]: - """Decorator assuring the wrapped method may only run if we are the default - repository index. This is as we rely on git commands that operate - on that index only.""" + """Decorator ensuring the wrapped method may only run if we are the default + repository index. + + This is as we rely on git commands that operate on that index only. + """ @wraps(func) def check_default_index(self: "IndexFile", *args: Any, **kwargs: Any) -> _T: @@ -109,7 +101,7 @@ def default_index(func: Callable[..., _T]) -> Callable[..., _T]: def git_working_dir(func: Callable[..., _T]) -> Callable[..., _T]: """Decorator which changes the current working dir to the one of the git - repository in order to assure relative paths are handled correctly""" + repository in order to ensure relative paths are handled correctly.""" @wraps(func) def set_git_working_dir(self: "IndexFile", *args: Any, **kwargs: Any) -> _T: diff --git a/git/objects/__init__.py b/git/objects/__init__.py index 5910ac5..4447ca5 100644 --- a/git/objects/__init__.py +++ b/git/objects/__init__.py @@ -1,24 +1,25 @@ -""" -Import all submodules main classes into the package space -""" -# flake8: noqa -import inspect +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ -from .base import * -from .blob import * -from .commit import * -from .submodule import util as smutil -from .submodule.base import * -from .submodule.root import * -from .tag import * -from .tree import * +"""Import all submodules' main classes into the package space.""" -# Fix import dependency - add IndexObject to the util module, so that it can be -# imported by the submodule.base -smutil.IndexObject = IndexObject # type: ignore[attr-defined] -smutil.Object = Object # type: ignore[attr-defined] -del smutil +__all__ = [ + "IndexObject", + "Object", + "Blob", + "Commit", + "Submodule", + "UpdateProgress", + "RootModule", + "RootUpdateProgress", + "TagObject", + "Tree", + "TreeModifier", +] -# must come after submodule was made available - -__all__ = [name for name, obj in locals().items() if not (name.startswith("_") or inspect.ismodule(obj))] +from .base import IndexObject, Object +from .blob import Blob +from .commit import Commit +from .submodule import RootModule, RootUpdateProgress, Submodule, UpdateProgress +from .tag import TagObject +from .tree import Tree, TreeModifier diff --git a/git/objects/base.py b/git/objects/base.py index 1d07fd0..eeaebc0 100644 --- a/git/objects/base.py +++ b/git/objects/base.py @@ -1,45 +1,73 @@ -# base.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ -from git.exc import WorkTreeRepositoryUnsupported -from git.util import LazyMixin, join_path_native, stream_copy, bin_to_hex +__all__ = ["Object", "IndexObject"] -import gitdb.typ as dbtyp import os.path as osp -from .util import get_object_type_by_name +import gitdb.typ as dbtyp +from git.exc import WorkTreeRepositoryUnsupported +from git.util import LazyMixin, bin_to_hex, join_path_native, stream_copy + +from .util import get_object_type_by_name # typing ------------------------------------------------------------------ from typing import Any, TYPE_CHECKING, Union -from git.types import PathLike, Commit_ish, Lit_commit_ish +from git.types import AnyGitObject, GitObjectTypeString, PathLike if TYPE_CHECKING: - from git.repo import Repo from gitdb.base import OStream - from .tree import Tree + + from git.refs.reference import Reference + from git.repo import Repo + from .blob import Blob from .submodule.base import Submodule - from git.refs.reference import Reference + from .tree import Tree IndexObjUnion = Union["Tree", "Blob", "Submodule"] # -------------------------------------------------------------------------- -_assertion_msg_format = "Created object %r whose python type %r disagrees with the actual git object type %r" +class Object(LazyMixin): + """Base class for classes representing git object types. + + The following four leaf classes represent specific kinds of git objects: -__all__ = ("Object", "IndexObject") + * :class:`Blob <git.objects.blob.Blob>` + * :class:`Tree <git.objects.tree.Tree>` + * :class:`Commit <git.objects.commit.Commit>` + * :class:`TagObject <git.objects.tag.TagObject>` + See :manpage:`gitglossary(7)` on: -class Object(LazyMixin): + * "object": https://git-scm.com/docs/gitglossary#def_object + * "object type": https://git-scm.com/docs/gitglossary#def_object_type + * "blob": https://git-scm.com/docs/gitglossary#def_blob_object + * "tree object": https://git-scm.com/docs/gitglossary#def_tree_object + * "commit object": https://git-scm.com/docs/gitglossary#def_commit_object + * "tag object": https://git-scm.com/docs/gitglossary#def_tag_object + + :note: + See the :class:`~git.types.AnyGitObject` union type of the four leaf subclasses + that represent actual git object types. - """Implements an Object which may be Blobs, Trees, Commits and Tags""" + :note: + :class:`~git.objects.submodule.base.Submodule` is defined under the hierarchy + rooted at this :class:`Object` class, even though submodules are not really a + type of git object. (This also applies to its + :class:`~git.objects.submodule.root.RootModule` subclass.) + + :note: + This :class:`Object` class should not be confused with :class:`object` (the root + of the class hierarchy in Python). + """ NULL_HEX_SHA = "0" * 40 NULL_BIN_SHA = b"\0" * 20 @@ -50,17 +78,37 @@ class Object(LazyMixin): dbtyp.str_commit_type, dbtyp.str_tag_type, ) + __slots__ = ("repo", "binsha", "size") - type: Union[Lit_commit_ish, None] = None - def __init__(self, repo: "Repo", binsha: bytes): + type: Union[GitObjectTypeString, None] = None + """String identifying (a concrete :class:`Object` subtype for) a git object type. + + The subtypes that this may name correspond to the kinds of git objects that exist, + i.e., the objects that may be present in a git repository. + + :note: + Most subclasses represent specific types of git objects and override this class + attribute accordingly. This attribute is ``None`` in the :class:`Object` base + class, as well as the :class:`IndexObject` intermediate subclass, but never + ``None`` in concrete leaf subclasses representing specific git object types. + + :note: + See also :class:`~git.types.GitObjectTypeString`. + """ + + def __init__(self, repo: "Repo", binsha: bytes) -> None: """Initialize an object by identifying it by its binary sha. - All keyword arguments will be set on demand if None. - :param repo: repository this object is located in + All keyword arguments will be set on demand if ``None``. - :param binsha: 20 byte SHA1""" - super(Object, self).__init__() + :param repo: + Repository this object is located in. + + :param binsha: + 20 byte SHA1 + """ + super().__init__() self.repo = repo self.binsha = binsha assert len(binsha) == 20, "Require 20 byte binary sha, got %r, len = %i" % ( @@ -69,26 +117,33 @@ class Object(LazyMixin): ) @classmethod - def new(cls, repo: "Repo", id: Union[str, "Reference"]) -> Commit_ish: + def new(cls, repo: "Repo", id: Union[str, "Reference"]) -> AnyGitObject: """ - :return: New Object instance of a type appropriate to the object type behind - id. The id of the newly created object will be a binsha even though - the input id may have been a Reference or Rev-Spec + :return: + New :class:`Object` instance of a type appropriate to the object type behind + `id`. The id of the newly created object will be a binsha even though the + input id may have been a `~git.refs.reference.Reference` or rev-spec. - :param id: reference, rev-spec, or hexsha + :param id: + :class:`~git.refs.reference.Reference`, rev-spec, or hexsha. - :note: This cannot be a __new__ method as it would always call __init__ - with the input id which is not necessarily a binsha.""" + :note: + This cannot be a ``__new__`` method as it would always call :meth:`__init__` + with the input id which is not necessarily a binsha. + """ return repo.rev_parse(str(id)) @classmethod - def new_from_sha(cls, repo: "Repo", sha1: bytes) -> Commit_ish: + def new_from_sha(cls, repo: "Repo", sha1: bytes) -> AnyGitObject: + """ + :return: + New object instance of a type appropriate to represent the given binary sha1 + + :param sha1: + 20 byte binary sha1. """ - :return: new object instance of a type appropriate to represent the given - binary sha1 - :param sha1: 20 byte binary sha1""" if sha1 == cls.NULL_BIN_SHA: - # the NULL binsha is always the root commit + # The NULL binsha is always the root commit. return get_object_type_by_name(b"commit")(repo, sha1) # END handle special case oinfo = repo.odb.info(sha1) @@ -97,22 +152,21 @@ class Object(LazyMixin): return inst def _set_cache_(self, attr: str) -> None: - """Retrieve object information""" + """Retrieve object information.""" if attr == "size": oinfo = self.repo.odb.info(self.binsha) - self.size = oinfo.size # type: int - # assert oinfo.type == self.type, _assertion_msg_format % (self.binsha, oinfo.type, self.type) + self.size = oinfo.size # type: int else: - super(Object, self)._set_cache_(attr) + super()._set_cache_(attr) def __eq__(self, other: Any) -> bool: - """:return: True if the objects have the same SHA1""" + """:return: ``True`` if the objects have the same SHA1""" if not hasattr(other, "binsha"): return False return self.binsha == other.binsha def __ne__(self, other: Any) -> bool: - """:return: True if the objects do not have the same SHA1""" + """:return: ``True`` if the objects do not have the same SHA1""" if not hasattr(other, "binsha"): return True return self.binsha != other.binsha @@ -122,43 +176,56 @@ class Object(LazyMixin): return hash(self.binsha) def __str__(self) -> str: - """:return: string of our SHA1 as understood by all git commands""" + """:return: String of our SHA1 as understood by all git commands""" return self.hexsha def __repr__(self) -> str: - """:return: string with pythonic representation of our object""" + """:return: String with pythonic representation of our object""" return '<git.%s "%s">' % (self.__class__.__name__, self.hexsha) @property def hexsha(self) -> str: """:return: 40 byte hex version of our 20 byte binary sha""" - # b2a_hex produces bytes + # b2a_hex produces bytes. return bin_to_hex(self.binsha).decode("ascii") @property def data_stream(self) -> "OStream": - """:return: File Object compatible stream to the uncompressed raw data of the object - :note: returned streams must be read in order""" + """ + :return: + File-object compatible stream to the uncompressed raw data of the object + + :note: + Returned streams must be read in order. + """ return self.repo.odb.stream(self.binsha) def stream_data(self, ostream: "OStream") -> "Object": - """Writes our data directly to the given output stream + """Write our data directly to the given output stream. + + :param ostream: + File-object compatible stream object. - :param ostream: File object compatible stream object. - :return: self""" + :return: + self + """ istream = self.repo.odb.stream(self.binsha) stream_copy(istream, ostream) return self class IndexObject(Object): + """Base for all objects that can be part of the index file. - """Base for all objects that can be part of the index file , namely Tree, Blob and - SubModule objects""" + The classes representing git object types that can be part of the index file are + :class:`~git.objects.tree.Tree and :class:`~git.objects.blob.Blob`. In addition, + :class:`~git.objects.submodule.base.Submodule`, which is not really a git object + type but can be part of an index file, is also a subclass. + """ __slots__ = ("path", "mode") - # for compatibility with iterable lists + # For compatibility with iterable lists. _id_attribute_ = "path" def __init__( @@ -168,20 +235,27 @@ class IndexObject(Object): mode: Union[None, int] = None, path: Union[None, PathLike] = None, ) -> None: - """Initialize a newly instanced IndexObject + """Initialize a newly instanced :class:`IndexObject`. + + :param repo: + The :class:`~git.repo.base.Repo` we are located in. + + :param binsha: + 20 byte sha1. - :param repo: is the Repo we are located in - :param binsha: 20 byte sha1 :param mode: - is the stat compatible file mode as int, use the stat module - to evaluate the information + The stat-compatible file mode as :class:`int`. + Use the :mod:`stat` module to evaluate the information. + :param path: - is the path to the file in the file system, relative to the git repository root, i.e. - file.ext or folder/other.ext + The path to the file in the file system, relative to the git repository + root, like ``file.ext`` or ``folder/other.ext``. + :note: - Path may not be set of the index object has been created directly as it cannot - be retrieved without knowing the parent tree.""" - super(IndexObject, self).__init__(repo, binsha) + Path may not be set if the index object has been created directly, as it + cannot be retrieved without knowing the parent tree. + """ + super().__init__(repo, binsha) if mode is not None: self.mode = mode if path is not None: @@ -190,19 +264,20 @@ class IndexObject(Object): def __hash__(self) -> int: """ :return: - Hash of our path as index items are uniquely identifiable by path, not - by their data !""" + Hash of our path as index items are uniquely identifiable by path, not by + their data! + """ return hash(self.path) def _set_cache_(self, attr: str) -> None: if attr in IndexObject.__slots__: - # they cannot be retrieved lateron ( not without searching for them ) + # They cannot be retrieved later on (not without searching for them). raise AttributeError( "Attribute '%s' unset: path and mode attributes must have been set during %s object creation" % (attr, type(self).__name__) ) else: - super(IndexObject, self)._set_cache_(attr) + super()._set_cache_(attr) # END handle slot attribute @property @@ -212,13 +287,15 @@ class IndexObject(Object): @property def abspath(self) -> PathLike: - """ + R""" :return: - Absolute path to this index object in the file system ( as opposed to the - .path field which is a path relative to the git repository ). + Absolute path to this index object in the file system (as opposed to the + :attr:`path` field which is a path relative to the git repository). - The returned path will be native to the system and contains '\' on windows.""" + The returned path will be native to the system and contains ``\`` on + Windows. + """ if self.repo.working_tree_dir is not None: return join_path_native(self.repo.working_tree_dir, self.path) else: - raise WorkTreeRepositoryUnsupported("Working_tree_dir was None or empty") + raise WorkTreeRepositoryUnsupported("working_tree_dir was None or empty") diff --git a/git/objects/blob.py b/git/objects/blob.py index 96ce486..58de596 100644 --- a/git/objects/blob.py +++ b/git/objects/blob.py @@ -1,24 +1,32 @@ -# blob.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +__all__ = ["Blob"] + from mimetypes import guess_type -from . import base +import sys -from git.types import Literal +if sys.version_info >= (3, 8): + from typing import Literal +else: + from typing_extensions import Literal -__all__ = ("Blob",) +from . import base class Blob(base.IndexObject): + """A Blob encapsulates a git blob object. - """A Blob encapsulates a git blob object""" + See :manpage:`gitglossary(7)` on "blob": + https://git-scm.com/docs/gitglossary#def_blob_object + """ DEFAULT_MIME_TYPE = "text/plain" type: Literal["blob"] = "blob" - # valid blob modes + # Valid blob modes executable_mode = 0o100755 file_mode = 0o100644 link_mode = 0o120000 @@ -28,8 +36,12 @@ class Blob(base.IndexObject): @property def mime_type(self) -> str: """ - :return: String describing the mime type of this file (based on the filename) - :note: Defaults to 'text/plain' in case the actual file type is unknown.""" + :return: + String describing the mime type of this file (based on the filename) + + :note: + Defaults to ``text/plain`` in case the actual file type is unknown. + """ guesses = None if self.path: guesses = guess_type(str(self.path)) diff --git a/git/objects/commit.py b/git/objects/commit.py index 88c485d..0ceb466 100644 --- a/git/objects/commit.py +++ b/git/objects/commit.py @@ -1,72 +1,82 @@ -# commit.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +__all__ = ["Commit"] + +from collections import defaultdict import datetime +from io import BytesIO +import logging +import os import re from subprocess import Popen, PIPE +import sys +from time import altzone, daylight, localtime, time, timezone +import warnings + from gitdb import IStream -from git.util import hex_to_bin, Actor, Stats, finalize_process -from git.diff import Diffable + from git.cmd import Git +from git.diff import Diffable +from git.util import Actor, Stats, finalize_process, hex_to_bin -from .tree import Tree from . import base +from .tree import Tree from .util import ( Serializable, TraversableIterableObj, - parse_date, altz_to_utctz_str, - parse_actor_and_date, from_timestamp, + parse_actor_and_date, + parse_date, ) -from time import time, daylight, altzone, timezone, localtime -import os -from io import BytesIO -import logging -from collections import defaultdict - - # typing ------------------------------------------------------------------ from typing import ( Any, + Dict, IO, Iterator, List, Sequence, Tuple, - Union, TYPE_CHECKING, + Union, cast, - Dict, ) -from git.types import PathLike, Literal +if sys.version_info >= (3, 8): + from typing import Literal +else: + from typing_extensions import Literal + +from git.types import PathLike if TYPE_CHECKING: - from git.repo import Repo from git.refs import SymbolicReference + from git.repo import Repo # ------------------------------------------------------------------------ -log = logging.getLogger("git.objects.commit") -log.addHandler(logging.NullHandler()) - -__all__ = ("Commit",) +_logger = logging.getLogger(__name__) class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): + """Wraps a git commit object. - """Wraps a git Commit object. + See :manpage:`gitglossary(7)` on "commit object": + https://git-scm.com/docs/gitglossary#def_commit_object - This class will act lazily on some of its attributes and will query the - value on demand only if it involves calling the git binary.""" + :note: + This class will act lazily on some of its attributes and will query the value on + demand only if it involves calling the git binary. + """ # ENVIRONMENT VARIABLES - # read when creating new commits + # Read when creating new commits. env_author_date = "GIT_AUTHOR_DATE" env_committer_date = "GIT_COMMITTER_DATE" @@ -76,8 +86,8 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): # INVARIANTS default_encoding = "UTF-8" - # object configuration type: Literal["commit"] = "commit" + __slots__ = ( "tree", "author", @@ -91,8 +101,11 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): "encoding", "gpgsig", ) + _id_attribute_ = "hexsha" + parents: Sequence["Commit"] + def __init__( self, repo: "Repo", @@ -109,41 +122,54 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): encoding: Union[str, None] = None, gpgsig: Union[str, None] = None, ) -> None: - """Instantiate a new Commit. All keyword arguments taking None as default will - be implicitly set on first query. - - :param binsha: 20 byte sha1 - :param parents: tuple( Commit, ... ) - is a tuple of commit ids or actual Commits - :param tree: Tree object - :param author: Actor - is the author Actor object + """Instantiate a new :class:`Commit`. All keyword arguments taking ``None`` as + default will be implicitly set on first query. + + :param binsha: + 20 byte sha1. + + :param tree: + A :class:`~git.objects.tree.Tree` object. + + :param author: + The author :class:`~git.util.Actor` object. + :param authored_date: int_seconds_since_epoch - is the authored DateTime - use time.gmtime() to convert it into a - different format + The authored DateTime - use :func:`time.gmtime` to convert it into a + different format. + :param author_tz_offset: int_seconds_west_of_utc - is the timezone that the authored_date is in - :param committer: Actor - is the committer string + The timezone that the `authored_date` is in. + + :param committer: + The committer string, as an :class:`~git.util.Actor` object. + :param committed_date: int_seconds_since_epoch - is the committed DateTime - use time.gmtime() to convert it into a - different format + The committed DateTime - use :func:`time.gmtime` to convert it into a + different format. + :param committer_tz_offset: int_seconds_west_of_utc - is the timezone that the committed_date is in + The timezone that the `committed_date` is in. + :param message: string - is the commit message + The commit message. + :param encoding: string - encoding of the message, defaults to UTF-8 + Encoding of the message, defaults to UTF-8. + :param parents: - List or tuple of Commit objects which are our parent(s) in the commit - dependency graph - :return: git.Commit + List or tuple of :class:`Commit` objects which are our parent(s) in the + commit dependency graph. + + :return: + :class:`Commit` :note: - Timezone information is in the same format and in the same sign - as what time.altzone returns. The sign is inverted compared to git's - UTC timezone.""" - super(Commit, self).__init__(repo, binsha) + Timezone information is in the same format and in the same sign as what + :func:`time.altzone` returns. The sign is inverted compared to git's UTC + timezone. + """ + super().__init__(repo, binsha) self.binsha = binsha if tree is not None: assert isinstance(tree, Tree), "Tree needs to be a Tree instance, was %s" % type(tree) @@ -178,8 +204,11 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): def _calculate_sha_(cls, repo: "Repo", commit: "Commit") -> bytes: """Calculate the sha of a commit. - :param repo: Repo object the commit should be part of - :param commit: Commit object for which to generate the sha + :param repo: + :class:`~git.repo.base.Repo` object the commit should be part of. + + :param commit: + :class:`Commit` object for which to generate the sha. """ stream = BytesIO() @@ -191,10 +220,10 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): return istream.binsha def replace(self, **kwargs: Any) -> "Commit": - """Create new commit object from existing commit object. + """Create new commit object from an existing commit object. - Any values provided as keyword arguments will replace the - corresponding attribute in the new object. + Any values provided as keyword arguments will replace the corresponding + attribute in the new object. """ attrs = {k: getattr(self, k) for k in self.__slots__} @@ -211,11 +240,11 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): def _set_cache_(self, attr: str) -> None: if attr in Commit.__slots__: - # read the data in a chunk, its faster - then provide a file wrapper + # Read the data in a chunk, its faster - then provide a file wrapper. _binsha, _typename, self.size, stream = self.repo.odb.stream(self.binsha) self._deserialize(BytesIO(stream.read())) else: - super(Commit, self)._set_cache_(attr) + super()._set_cache_(attr) # END handle attrs @property @@ -235,18 +264,22 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): return self.message.split(b"\n", 1)[0] def count(self, paths: Union[PathLike, Sequence[PathLike]] = "", **kwargs: Any) -> int: - """Count the number of commits reachable from this commit + """Count the number of commits reachable from this commit. :param paths: - is an optional path or a list of paths restricting the return value - to commits actually containing the paths + An optional path or a list of paths restricting the return value to commits + actually containing the paths. :param kwargs: - Additional options to be passed to git-rev-list. They must not alter - the output style of the command, or parsing will yield incorrect results - :return: int defining the number of reachable commits""" - # yes, it makes a difference whether empty paths are given or not in our case - # as the empty paths version will ignore merge commits for some reason. + Additional options to be passed to :manpage:`git-rev-list(1)`. They must not + alter the output style of the command, or parsing will yield incorrect + results. + + :return: + An int defining the number of reachable commits + """ + # Yes, it makes a difference whether empty paths are given or not in our case as + # the empty paths version will ignore merge commits for some reason. if paths: return len(self.repo.git.rev_list(self.hexsha, "--", paths, **kwargs).splitlines()) return len(self.repo.git.rev_list(self.hexsha, **kwargs).splitlines()) @@ -255,37 +288,50 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): def name_rev(self) -> str: """ :return: - String describing the commits hex sha based on the closest Reference. - Mostly useful for UI purposes""" + String describing the commits hex sha based on the closest + `~git.refs.reference.Reference`. + + :note: + Mostly useful for UI purposes. + """ return self.repo.git.name_rev(self) @classmethod def iter_items( cls, repo: "Repo", - rev: Union[str, "Commit", "SymbolicReference"], # type: ignore + rev: Union[str, "Commit", "SymbolicReference"], paths: Union[PathLike, Sequence[PathLike]] = "", **kwargs: Any, ) -> Iterator["Commit"]: - """Find all commits matching the given criteria. + R"""Find all commits matching the given criteria. + + :param repo: + The :class:`~git.repo.base.Repo`. + + :param rev: + Revision specifier. See :manpage:`git-rev-parse(1)` for viable options. - :param repo: is the Repo - :param rev: revision specifier, see git-rev-parse for viable options :param paths: - is an optional path or list of paths, if set only Commits that include the path - or paths will be considered + An optional path or list of paths. If set only :class:`Commit`\s that + include the path or paths will be considered. + :param kwargs: - optional keyword arguments to git rev-list where - ``max_count`` is the maximum number of commits to fetch - ``skip`` is the number of commits to skip - ``since`` all commits since i.e. '1970-01-01' - :return: iterator yielding Commit items""" + Optional keyword arguments to :manpage:`git-rev-list(1)` where: + + * ``max_count`` is the maximum number of commits to fetch. + * ``skip`` is the number of commits to skip. + * ``since`` selects all commits since some date, e.g. ``"1970-01-01"``. + + :return: + Iterator yielding :class:`Commit` items. + """ if "pretty" in kwargs: raise ValueError("--pretty cannot be used as parsing expects single sha's only") # END handle pretty - # use -- in any case, to prevent possibility of ambiguous arguments - # see https://github.com/gitpython-developers/GitPython/issues/264 + # Use -- in all cases, to prevent possibility of ambiguous arguments. + # See https://github.com/gitpython-developers/GitPython/issues/264. args_list: List[PathLike] = ["--"] @@ -303,13 +349,18 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): return cls._iter_from_process_or_stream(repo, proc) def iter_parents(self, paths: Union[PathLike, Sequence[PathLike]] = "", **kwargs: Any) -> Iterator["Commit"]: - """Iterate _all_ parents of this commit. + R"""Iterate _all_ parents of this commit. :param paths: - Optional path or list of paths limiting the Commits to those that - contain at least one of the paths - :param kwargs: All arguments allowed by git-rev-list - :return: Iterator yielding Commit objects which are parents of self""" + Optional path or list of paths limiting the :class:`Commit`\s to those that + contain at least one of the paths. + + :param kwargs: + All arguments allowed by :manpage:`git-rev-list(1)`. + + :return: + Iterator yielding :class:`Commit` objects which are parents of ``self`` + """ # skip ourselves skip = kwargs.get("skip", 1) if skip == 0: # skip ourselves @@ -323,39 +374,58 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): """Create a git stat from changes between this commit and its first parent or from all changes done if this is the very first commit. - :return: git.Stats""" - if not self.parents: - text = self.repo.git.diff_tree(self.hexsha, "--", numstat=True, no_renames=True, root=True) - text2 = "" - for line in text.splitlines()[1:]: + :return: + :class:`Stats` + """ + + def process_lines(lines: List[str]) -> str: + text = "" + for file_info, line in zip(lines, lines[len(lines) // 2 :]): + change_type = file_info.split("\t")[0][-1] (insertions, deletions, filename) = line.split("\t") - text2 += "%s\t%s\t%s\n" % (insertions, deletions, filename) - text = text2 + text += "%s\t%s\t%s\t%s\n" % (change_type, insertions, deletions, filename) + return text + + if not self.parents: + lines = self.repo.git.diff_tree( + self.hexsha, "--", numstat=True, no_renames=True, root=True, raw=True + ).splitlines()[1:] + text = process_lines(lines) else: - text = self.repo.git.diff(self.parents[0].hexsha, self.hexsha, "--", numstat=True, no_renames=True) + lines = self.repo.git.diff( + self.parents[0].hexsha, self.hexsha, "--", numstat=True, no_renames=True, raw=True + ).splitlines() + text = process_lines(lines) return Stats._list_from_string(self.repo, text) @property def trailers(self) -> Dict[str, str]: - """Get the trailers of the message as a dictionary + """Deprecated. Get the trailers of the message as a dictionary. - :note: This property is deprecated, please use either ``Commit.trailers_list`` or ``Commit.trailers_dict``. + :note: + This property is deprecated, please use either :attr:`trailers_list` or + :attr:`trailers_dict`. :return: Dictionary containing whitespace stripped trailer information. Only contains the latest instance of each trailer key. """ + warnings.warn( + "Commit.trailers is deprecated, use Commit.trailers_list or Commit.trailers_dict instead", + DeprecationWarning, + stacklevel=2, + ) return {k: v[0] for k, v in self.trailers_dict.items()} @property def trailers_list(self) -> List[Tuple[str, str]]: - """Get the trailers of the message as a list + """Get the trailers of the message as a list. - Git messages can contain trailer information that are similar to RFC 822 - e-mail headers (see: https://git-scm.com/docs/git-interpret-trailers). + Git messages can contain trailer information that are similar to :rfc:`822` + e-mail headers. See :manpage:`git-interpret-trailers(1)`. - This functions calls ``git interpret-trailers --parse`` onto the message - to extract the trailer information, returns the raw trailer data as a list. + This function calls ``git interpret-trailers --parse`` onto the message to + extract the trailer information, returns the raw trailer data as a list. Valid message with trailer:: @@ -369,7 +439,6 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): key1: value1.2 key2 : value 2 with inner spaces - Returned list will look like this:: [ @@ -378,12 +447,15 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): ("key2", "value 2 with inner spaces"), ] - :return: List containing key-value tuples of whitespace stripped trailer information. """ cmd = ["git", "interpret-trailers", "--parse"] - proc: Git.AutoInterrupt = self.repo.git.execute(cmd, as_process=True, istream=PIPE) # type: ignore + proc: Git.AutoInterrupt = self.repo.git.execute( # type: ignore[call-overload] + cmd, + as_process=True, + istream=PIPE, + ) trailer: str = proc.communicate(str(self.message).encode())[0].decode("utf8") trailer = trailer.strip() @@ -399,14 +471,14 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): @property def trailers_dict(self) -> Dict[str, List[str]]: - """Get the trailers of the message as a dictionary + """Get the trailers of the message as a dictionary. - Git messages can contain trailer information that are similar to RFC 822 - e-mail headers (see: https://git-scm.com/docs/git-interpret-trailers). + Git messages can contain trailer information that are similar to :rfc:`822` + e-mail headers. See :manpage:`git-interpret-trailers(1)`. - This functions calls ``git interpret-trailers --parse`` onto the message - to extract the trailer information. The key value pairs are stripped of - leading and trailing whitespaces before they get saved into a dictionary. + This function calls ``git interpret-trailers --parse`` onto the message to + extract the trailer information. The key value pairs are stripped of leading and + trailing whitespaces before they get saved into a dictionary. Valid message with trailer:: @@ -420,7 +492,6 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): key1: value1.2 key2 : value 2 with inner spaces - Returned dictionary will look like this:: { @@ -430,8 +501,8 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): :return: - Dictionary containing whitespace stripped trailer information. - Mapping trailer keys to a list of their corresponding values. + Dictionary containing whitespace stripped trailer information, mapping + trailer keys to a list of their corresponding values. """ d = defaultdict(list) for key, val in self.trailers_list: @@ -440,12 +511,17 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): @classmethod def _iter_from_process_or_stream(cls, repo: "Repo", proc_or_stream: Union[Popen, IO]) -> Iterator["Commit"]: - """Parse out commit information into a list of Commit objects - We expect one-line per commit, and parse the actual commit information directly - from our lighting fast object database + """Parse out commit information into a list of :class:`Commit` objects. + + We expect one line per commit, and parse the actual commit information directly + from our lighting fast object database. + + :param proc: + :manpage:`git-rev-list(1)` process instance - one sha per line. - :param proc: git-rev-list process instance - one sha per line - :return: iterator returning Commit objects""" + :return: + Iterator supplying :class:`Commit` objects + """ # def is_proc(inp) -> TypeGuard[Popen]: # return hasattr(proc_or_stream, 'wait') and not hasattr(proc_or_stream, 'readline') @@ -458,7 +534,7 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): if proc_or_stream.stdout is not None: stream = proc_or_stream.stdout elif hasattr(proc_or_stream, "readline"): - proc_or_stream = cast(IO, proc_or_stream) # type: ignore [redundant-cast] + proc_or_stream = cast(IO, proc_or_stream) # type: ignore[redundant-cast] stream = proc_or_stream readline = stream.readline @@ -468,15 +544,16 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): break hexsha = line.strip() if len(hexsha) > 40: - # split additional information, as returned by bisect for instance + # Split additional information, as returned by bisect for instance. hexsha, _ = line.split(None, 1) # END handle extra info assert len(hexsha) == 40, "Invalid line: %s" % hexsha yield cls(repo, hex_to_bin(hexsha)) # END for each line in stream - # TODO: Review this - it seems process handling got a bit out of control - # due to many developers trying to fix the open file handles issue + + # TODO: Review this - it seems process handling got a bit out of control due to + # many developers trying to fix the open file handles issue. if hasattr(proc_or_stream, "wait"): proc_or_stream = cast(Popen, proc_or_stream) finalize_process(proc_or_stream) @@ -494,55 +571,70 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): author_date: Union[None, str, datetime.datetime] = None, commit_date: Union[None, str, datetime.datetime] = None, ) -> "Commit": - """Commit the given tree, creating a commit object. + """Commit the given tree, creating a :class:`Commit` object. + + :param repo: + :class:`~git.repo.base.Repo` object the commit should be part of. + + :param tree: + :class:`~git.objects.tree.Tree` object or hex or bin sha. + The tree of the new commit. + + :param message: + Commit message. It may be an empty string if no message is provided. It will + be converted to a string, in any case. - :param repo: Repo object the commit should be part of - :param tree: Tree object or hex or bin sha - the tree of the new commit - :param message: Commit message. It may be an empty string if no message is provided. - It will be converted to a string , in any case. :param parent_commits: - Optional Commit objects to use as parents for the new commit. - If empty list, the commit will have no parents at all and become - a root commit. - If None , the current head commit will be the parent of the - new commit object + Optional :class:`Commit` objects to use as parents for the new commit. If + empty list, the commit will have no parents at all and become a root commit. + If ``None``, the current head commit will be the parent of the new commit + object. + :param head: - If True, the HEAD will be advanced to the new commit automatically. - Else the HEAD will remain pointing on the previous commit. This could + If ``True``, the HEAD will be advanced to the new commit automatically. + Otherwise the HEAD will remain pointing on the previous commit. This could lead to undesired results when diffing files. - :param author: The name of the author, optional. If unset, the repository - configuration is used to obtain this value. - :param committer: The name of the committer, optional. If unset, the - repository configuration is used to obtain this value. - :param author_date: The timestamp for the author field - :param commit_date: The timestamp for the committer field - :return: Commit object representing the new commit + :param author: + The name of the author, optional. + If unset, the repository configuration is used to obtain this value. + + :param committer: + The name of the committer, optional. + If unset, the repository configuration is used to obtain this value. + + :param author_date: + The timestamp for the author field. + + :param commit_date: + The timestamp for the committer field. + + :return: + :class:`Commit` object representing the new commit. :note: - Additional information about the committer and Author are taken from the - environment or from the git configuration, see git-commit-tree for - more information""" + Additional information about the committer and author are taken from the + environment or from the git configuration. See :manpage:`git-commit-tree(1)` + for more information. + """ if parent_commits is None: try: parent_commits = [repo.head.commit] except ValueError: - # empty repositories have no head commit + # Empty repositories have no head commit. parent_commits = [] # END handle parent commits else: for p in parent_commits: if not isinstance(p, cls): raise ValueError(f"Parent commit '{p!r}' must be of type {cls}") - # end check parent commit types + # END check parent commit types # END if parent commits are unset - # retrieve all additional information, create a commit object, and - # serialize it + # Retrieve all additional information, create a commit object, and serialize it. # Generally: - # * Environment variables override configuration values - # * Sensible defaults are set according to the git documentation + # * Environment variables override configuration values. + # * Sensible defaults are set according to the git documentation. # COMMITTER AND AUTHOR INFO cr = repo.config_reader() @@ -574,14 +666,14 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): committer_time, committer_offset = unix_time, offset # END set committer time - # assume utf8 encoding + # Assume UTF-8 encoding. enc_section, enc_option = cls.conf_encoding.split(".") conf_encoding = cr.get_value(enc_section, enc_option, cls.default_encoding) if not isinstance(conf_encoding, str): raise TypeError("conf_encoding could not be coerced to str") - # if the tree is no object, make sure we create one - otherwise - # the created commit object is invalid + # If the tree is no object, make sure we create one - otherwise the created + # commit object is invalid. if isinstance(tree, str): tree = repo.tree(tree) # END tree conversion @@ -605,15 +697,15 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): new_commit.binsha = cls._calculate_sha_(repo, new_commit) if head: - # need late import here, importing git at the very beginning throws - # as well ... + # Need late import here, importing git at the very beginning throws as + # well... import git.refs try: repo.head.set_commit(new_commit, logmsg=message) except ValueError: - # head is not yet set to the ref our HEAD points to - # Happens on first commit + # head is not yet set to the ref our HEAD points to. + # Happens on first commit. master = git.refs.Head.create( repo, repo.head.ref, @@ -651,7 +743,7 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): ).encode(self.encoding) ) - # encode committer + # Encode committer. aname = c.name write( ( @@ -679,7 +771,7 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): write(b"\n") - # write plain bytes, be sure its encoded according to our encoding + # Write plain bytes, be sure its encoded according to our encoding. if isinstance(self.message, str): write(self.message.encode(self.encoding)) else: @@ -688,10 +780,6 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): return self def _deserialize(self, stream: BytesIO) -> "Commit": - """ - :param from_rev_list: if true, the stream format is coming from the rev-list command - Otherwise it is assumed to be a plain data stream from our object - """ readline = stream.readline self.tree = Tree(self.repo, hex_to_bin(readline().split()[1]), Tree.tree_id << 12, "") @@ -707,24 +795,25 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): # END for each parent line self.parents = tuple(self.parents) - # we don't know actual author encoding before we have parsed it, so keep the lines around + # We don't know actual author encoding before we have parsed it, so keep the + # lines around. author_line = next_line committer_line = readline() - # we might run into one or more mergetag blocks, skip those for now + # We might run into one or more mergetag blocks, skip those for now. next_line = readline() while next_line.startswith(b"mergetag "): next_line = readline() while next_line.startswith(b" "): next_line = readline() - # end skip mergetags + # END skip mergetags - # now we can have the encoding line, or an empty line followed by the optional + # Now we can have the encoding line, or an empty line followed by the optional # message. self.encoding = self.default_encoding self.gpgsig = "" - # read headers + # Read headers. enc = next_line buf = enc.strip() while buf: @@ -742,13 +831,13 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): is_next_header = True break sig += sigbuf[1:] - # end read all signature + # END read all signature self.gpgsig = sig.rstrip(b"\n").decode(self.encoding, "ignore") if is_next_header: continue buf = readline().strip() - # decode the authors name + # Decode the author's name. try: ( self.author, @@ -756,7 +845,7 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): self.author_tz_offset, ) = parse_actor_and_date(author_line.decode(self.encoding, "replace")) except UnicodeDecodeError: - log.error( + _logger.error( "Failed to decode author line '%s' using encoding %s", author_line, self.encoding, @@ -770,7 +859,7 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): self.committer_tz_offset, ) = parse_actor_and_date(committer_line.decode(self.encoding, "replace")) except UnicodeDecodeError: - log.error( + _logger.error( "Failed to decode committer line '%s' using encoding %s", committer_line, self.encoding, @@ -778,13 +867,13 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): ) # END handle author's encoding - # a stream from our data simply gives us the plain message - # The end of our message stream is marked with a newline that we strip + # A stream from our data simply gives us the plain message. + # The end of our message stream is marked with a newline that we strip. self.message = stream.read() try: self.message = self.message.decode(self.encoding, "replace") except UnicodeDecodeError: - log.error( + _logger.error( "Failed to decode message '%s' using encoding %s", self.message, self.encoding, @@ -798,11 +887,13 @@ class Commit(base.Object, TraversableIterableObj, Diffable, Serializable): @property def co_authors(self) -> List[Actor]: - """ - Search the commit message for any co-authors of this commit. - Details on co-authors: https://github.blog/2018-01-29-commit-together-with-co-authors/ + """Search the commit message for any co-authors of this commit. + + Details on co-authors: + https://github.blog/2018-01-29-commit-together-with-co-authors/ - :return: List of co-authors for this commit (as Actor objects). + :return: + List of co-authors for this commit (as :class:`~git.util.Actor` objects). """ co_authors = [] diff --git a/git/objects/fun.py b/git/objects/fun.py index 043eec7..fe57da1 100644 --- a/git/objects/fun.py +++ b/git/objects/fun.py @@ -1,6 +1,16 @@ -"""Module with functions which are supposed to be as fast as possible""" -from stat import S_ISDIR +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ +"""Functions that are supposed to be as fast as possible.""" + +__all__ = [ + "tree_to_stream", + "tree_entries_from_data", + "traverse_trees_recursive", + "traverse_tree_recursive", +] + +from stat import S_ISDIR from git.compat import safe_decode, defenc @@ -19,29 +29,26 @@ from typing import ( if TYPE_CHECKING: from _typeshed import ReadableBuffer + from git import GitCmdObjectDB -EntryTup = Tuple[bytes, int, str] # same as TreeCacheTup in tree.py +EntryTup = Tuple[bytes, int, str] # Same as TreeCacheTup in tree.py. EntryTupOrNone = Union[EntryTup, None] # --------------------------------------------------- -__all__ = ( - "tree_to_stream", - "tree_entries_from_data", - "traverse_trees_recursive", - "traverse_tree_recursive", -) - - def tree_to_stream(entries: Sequence[EntryTup], write: Callable[["ReadableBuffer"], Union[int, None]]) -> None: - """Write the give list of entries into a stream using its write method + """Write the given list of entries into a stream using its ``write`` method. + + :param entries: + **Sorted** list of tuples with (binsha, mode, name). - :param entries: **sorted** list of tuples with (binsha, mode, name) - :param write: write method which takes a data string""" + :param write: + A ``write`` method which takes a data string. + """ ord_zero = ord("0") - bit_mask = 7 # 3 bits set + bit_mask = 7 # 3 bits set. for binsha, mode, name in entries: mode_str = b"" @@ -49,16 +56,16 @@ def tree_to_stream(entries: Sequence[EntryTup], write: Callable[["ReadableBuffer mode_str = bytes([((mode >> (i * 3)) & bit_mask) + ord_zero]) + mode_str # END for each 8 octal value - # git slices away the first octal if its zero + # git slices away the first octal if it's zero. if mode_str[0] == ord_zero: mode_str = mode_str[1:] # END save a byte - # here it comes: if the name is actually unicode, the replacement below - # will not work as the binsha is not part of the ascii unicode encoding - - # hence we must convert to an utf8 string for it to work properly. - # According to my tests, this is exactly what git does, that is it just - # takes the input literally, which appears to be utf8 on linux. + # Here it comes: If the name is actually unicode, the replacement below will not + # work as the binsha is not part of the ascii unicode encoding - hence we must + # convert to an UTF-8 string for it to work properly. According to my tests, + # this is exactly what git does, that is it just takes the input literally, + # which appears to be UTF-8 on linux. if isinstance(name, str): name_bytes = name.encode(defenc) else: @@ -68,10 +75,15 @@ def tree_to_stream(entries: Sequence[EntryTup], write: Callable[["ReadableBuffer def tree_entries_from_data(data: bytes) -> List[EntryTup]: - """Reads the binary representation of a tree and returns tuples of Tree items + """Read the binary representation of a tree and returns tuples of + :class:`~git.objects.tree.Tree` items. + + :param data: + Data block with tree data (as bytes). - :param data: data block with tree data (as bytes) - :return: list(tuple(binsha, mode, tree_relative_path), ...)""" + :return: + list(tuple(binsha, mode, tree_relative_path), ...) + """ ord_zero = ord("0") space_ord = ord(" ") len_data = len(data) @@ -80,32 +92,32 @@ def tree_entries_from_data(data: bytes) -> List[EntryTup]: while i < len_data: mode = 0 - # read mode - # Some git versions truncate the leading 0, some don't - # The type will be extracted from the mode later + # Read Mode + # Some git versions truncate the leading 0, some don't. + # The type will be extracted from the mode later. while data[i] != space_ord: - # move existing mode integer up one level being 3 bits - # and add the actual ordinal value of the character + # Move existing mode integer up one level being 3 bits and add the actual + # ordinal value of the character. mode = (mode << 3) + (data[i] - ord_zero) i += 1 # END while reading mode - # byte is space now, skip it + # Byte is space now, skip it. i += 1 - # parse name, it is NULL separated + # Parse name, it is NULL separated. ns = i while data[i] != 0: i += 1 # END while not reached NULL - # default encoding for strings in git is utf8 - # Only use the respective unicode object if the byte stream was encoded + # Default encoding for strings in git is UTF-8. + # Only use the respective unicode object if the byte stream was encoded. name_bytes = data[ns:i] name = safe_decode(name_bytes) - # byte is NULL, get next 20 + # Byte is NULL, get next 20. i += 1 sha = data[i : i + 20] i = i + 20 @@ -115,10 +127,11 @@ def tree_entries_from_data(data: bytes) -> List[EntryTup]: def _find_by_name(tree_data: MutableSequence[EntryTupOrNone], name: str, is_dir: bool, start_at: int) -> EntryTupOrNone: - """return data entry matching the given name and tree mode - or None. - Before the item is returned, the respective data item is set - None in the tree_data list to mark it done""" + """Return data entry matching the given name and tree mode or ``None``. + + Before the item is returned, the respective data item is set None in the `tree_data` + list to mark it done. + """ try: item = tree_data[start_at] @@ -138,17 +151,15 @@ def _find_by_name(tree_data: MutableSequence[EntryTupOrNone], name: str, is_dir: @overload -def _to_full_path(item: None, path_prefix: str) -> None: - ... +def _to_full_path(item: None, path_prefix: str) -> None: ... @overload -def _to_full_path(item: EntryTup, path_prefix: str) -> EntryTup: - ... +def _to_full_path(item: EntryTup, path_prefix: str) -> EntryTup: ... def _to_full_path(item: EntryTupOrNone, path_prefix: str) -> EntryTupOrNone: - """Rebuild entry with given path prefix""" + """Rebuild entry with given path prefix.""" if not item: return item return (item[0], item[1], path_prefix + item[2]) @@ -158,19 +169,30 @@ def traverse_trees_recursive( odb: "GitCmdObjectDB", tree_shas: Sequence[Union[bytes, None]], path_prefix: str ) -> List[Tuple[EntryTupOrNone, ...]]: """ - :return: list of list with entries according to the given binary tree-shas. + :return: + List of list with entries according to the given binary tree-shas. + The result is encoded in a list - of n tuple|None per blob/commit, (n == len(tree_shas)), where + of n tuple|None per blob/commit, (n == len(tree_shas)), where: + * [0] == 20 byte sha * [1] == mode as int * [2] == path relative to working tree root - The entry tuple is None if the respective blob/commit did not - exist in the given tree. - :param tree_shas: iterable of shas pointing to trees. All trees must - be on the same level. A tree-sha may be None in which case None - :param path_prefix: a prefix to be added to the returned paths on this level, - set it '' for the first iteration - :note: The ordering of the returned items will be partially lost""" + + The entry tuple is ``None`` if the respective blob/commit did not exist in the + given tree. + + :param tree_shas: + Iterable of shas pointing to trees. All trees must be on the same level. + A tree-sha may be ``None``, in which case ``None``. + + :param path_prefix: + A prefix to be added to the returned paths on this level. + Set it ``""`` for the first iteration. + + :note: + The ordering of the returned items will be partially lost. + """ trees_data: List[List[EntryTupOrNone]] = [] nt = len(tree_shas) @@ -178,7 +200,7 @@ def traverse_trees_recursive( if tree_sha is None: data: List[EntryTupOrNone] = [] else: - # make new list for typing as list invariant + # Make new list for typing as list invariant. data = list(tree_entries_from_data(odb.stream(tree_sha).read())) # END handle muted trees trees_data.append(data) @@ -186,9 +208,9 @@ def traverse_trees_recursive( out: List[Tuple[EntryTupOrNone, ...]] = [] - # find all matching entries and recursively process them together if the match - # is a tree. If the match is a non-tree item, put it into the result. - # Processed items will be set None + # Find all matching entries and recursively process them together if the match is a + # tree. If the match is a non-tree item, put it into the result. + # Processed items will be set None. for ti, tree_data in enumerate(trees_data): for ii, item in enumerate(tree_data): if not item: @@ -198,17 +220,17 @@ def traverse_trees_recursive( entries = [None for _ in range(nt)] entries[ti] = item _sha, mode, name = item - is_dir = S_ISDIR(mode) # type mode bits + is_dir = S_ISDIR(mode) # Type mode bits - # find this item in all other tree data items - # wrap around, but stop one before our current index, hence - # ti+nt, not ti+1+nt + # Find this item in all other tree data items. + # Wrap around, but stop one before our current index, hence ti+nt, not + # ti+1+nt. for tio in range(ti + 1, ti + nt): tio = tio % nt entries[tio] = _find_by_name(trees_data[tio], name, is_dir, ii) # END for each other item data - # if we are a directory, enter recursion + # If we are a directory, enter recursion. if is_dir: out.extend( traverse_trees_recursive( @@ -221,11 +243,11 @@ def traverse_trees_recursive( out.append(tuple(_to_full_path(e, path_prefix) for e in entries)) # END handle recursion - # finally mark it done + # Finally mark it done. tree_data[ii] = None # END for each item - # we are done with one tree, set all its data empty + # We are done with one tree, set all its data empty. del tree_data[:] # END for each tree_data chunk return out @@ -233,16 +255,22 @@ def traverse_trees_recursive( def traverse_tree_recursive(odb: "GitCmdObjectDB", tree_sha: bytes, path_prefix: str) -> List[EntryTup]: """ - :return: list of entries of the tree pointed to by the binary tree_sha. An entry - has the following format: + :return: + List of entries of the tree pointed to by the binary `tree_sha`. + + An entry has the following format: + * [0] 20 byte sha * [1] mode as int * [2] path relative to the repository - :param path_prefix: prefix to prepend to the front of all returned paths""" + + :param path_prefix: + Prefix to prepend to the front of all returned paths. + """ entries = [] data = tree_entries_from_data(odb.stream(tree_sha).read()) - # unpacking/packing is faster than accessing individual items + # Unpacking/packing is faster than accessing individual items. for sha, mode, name in data: if S_ISDIR(mode): entries.extend(traverse_tree_recursive(odb, sha, path_prefix + name + "/")) diff --git a/git/objects/submodule/__init__.py b/git/objects/submodule/__init__.py index 82df59b..c0604e7 100644 --- a/git/objects/submodule/__init__.py +++ b/git/objects/submodule/__init__.py @@ -1,2 +1,7 @@ -# NOTE: Cannot import anything here as the top-level _init_ has to handle -# our dependencies +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +__all__ = ["Submodule", "UpdateProgress", "RootModule", "RootUpdateProgress"] + +from .base import Submodule, UpdateProgress +from .root import RootModule, RootUpdateProgress diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py index c7e7856..fa60bcd 100644 --- a/git/objects/submodule/base.py +++ b/git/objects/submodule/base.py @@ -1,72 +1,82 @@ -# need a dict to set bloody .name field +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +__all__ = ["Submodule", "UpdateProgress"] + +import gc from io import BytesIO import logging import os +import os.path as osp import stat +import sys import uuid import git from git.cmd import Git -from git.compat import ( - defenc, - is_win, -) -from git.config import SectionConstraint, GitConfigParser, cp +from git.compat import defenc +from git.config import GitConfigParser, SectionConstraint, cp from git.exc import ( + BadName, InvalidGitRepositoryError, NoSuchPathError, RepositoryDirtyError, - BadName, ) from git.objects.base import IndexObject, Object from git.objects.util import TraversableIterableObj - from git.util import ( - join_path_native, - to_native_path_linux, + IterableList, RemoteProgress, + join_path_native, rmtree, + to_native_path_linux, unbare_repo, - IterableList, ) -from git.util import HIDE_WINDOWS_KNOWN_ERRORS - -import os.path as osp from .util import ( + SubmoduleConfigParser, + find_first_remote_branch, mkhead, sm_name, sm_section, - SubmoduleConfigParser, - find_first_remote_branch, ) - # typing ---------------------------------------------------------------------- -from typing import Callable, Dict, Mapping, Sequence, TYPE_CHECKING, cast -from typing import Any, Iterator, Union -from git.types import Commit_ish, Literal, PathLike, TBD +from typing import ( + Any, + Callable, + Dict, + Iterator, + Mapping, + Sequence, + TYPE_CHECKING, + Union, + cast, +) + +if sys.version_info >= (3, 8): + from typing import Literal +else: + from typing_extensions import Literal + +from git.types import Commit_ish, PathLike, TBD if TYPE_CHECKING: from git.index import IndexFile - from git.repo import Repo + from git.objects.commit import Commit from git.refs import Head - + from git.repo import Repo # ----------------------------------------------------------------------------- -__all__ = ["Submodule", "UpdateProgress"] - - -log = logging.getLogger("git.objects.submodule.base") -log.addHandler(logging.NullHandler()) +_logger = logging.getLogger(__name__) class UpdateProgress(RemoteProgress): - """Class providing detailed progress information to the caller who should - derive from it and implement the ``update(...)`` message""" + derive from it and implement the + :meth:`update(...) <git.util.RemoteProgress.update>` message.""" CLONE, FETCH, UPDWKTREE = [1 << x for x in range(RemoteProgress._num_op_codes, RemoteProgress._num_op_codes + 3)] _num_op_codes: int = RemoteProgress._num_op_codes + 3 @@ -81,29 +91,32 @@ FETCH = UpdateProgress.FETCH UPDWKTREE = UpdateProgress.UPDWKTREE -# IndexObject comes via util module, its a 'hacky' fix thanks to pythons import -# mechanism which cause plenty of trouble of the only reason for packages and -# modules is refactoring - subpackages shouldn't depend on parent packages +# IndexObject comes via the util module. It's a 'hacky' fix thanks to Python's import +# mechanism, which causes plenty of trouble if the only reason for packages and modules +# is refactoring - subpackages shouldn't depend on parent packages. class Submodule(IndexObject, TraversableIterableObj): - """Implements access to a git submodule. They are special in that their sha represents a commit in the submodule's repository which is to be checked out at the path of this instance. + The submodule type does not have a string type associated with it, as it exists solely as a marker in the tree and index. - All methods work in bare and non-bare repositories.""" + All methods work in bare and non-bare repositories. + """ _id_attribute_ = "name" k_modules_file = ".gitmodules" k_head_option = "branch" k_head_default = "master" - k_default_mode = stat.S_IFDIR | stat.S_IFLNK # submodules are directories with link-status + k_default_mode = stat.S_IFDIR | stat.S_IFLNK + """Submodule flags. Submodules are directories with link-status.""" - # this is a bogus type for base class compatibility - type: Literal["submodule"] = "submodule" # type: ignore + type: Literal["submodule"] = "submodule" # type: ignore[assignment] + """This is a bogus type string for base class compatibility.""" __slots__ = ("_parent_commit", "_url", "_branch_path", "_name", "__weakref__") + _cache_attrs = ("path", "_url", "_branch_path") def __init__( @@ -113,25 +126,40 @@ class Submodule(IndexObject, TraversableIterableObj): mode: Union[int, None] = None, path: Union[PathLike, None] = None, name: Union[str, None] = None, - parent_commit: Union[Commit_ish, None] = None, + parent_commit: Union["Commit", None] = None, url: Union[str, None] = None, branch_path: Union[PathLike, None] = None, ) -> None: - """Initialize this instance with its attributes. We only document the ones - that differ from ``IndexObject`` - - :param repo: Our parent repository - :param binsha: binary sha referring to a commit in the remote repository, see url parameter - :param parent_commit: see set_parent_commit() - :param url: The url to the remote repository which is the submodule - :param branch_path: full (relative) path to ref to checkout when cloning the remote repository""" - super(Submodule, self).__init__(repo, binsha, mode, path) + """Initialize this instance with its attributes. + + We only document the parameters that differ from + :class:`~git.objects.base.IndexObject`. + + :param repo: + Our parent repository. + + :param binsha: + Binary sha referring to a commit in the remote repository. + See the `url` parameter. + + :param parent_commit: + The :class:`~git.objects.commit.Commit` whose tree is supposed to contain + the ``.gitmodules`` blob, or ``None`` to always point to the most recent + commit. See :meth:`set_parent_commit` for details. + + :param url: + The URL to the remote repository which is the submodule. + + :param branch_path: + Full repository-relative path to ref to checkout when cloning the remote + repository. + """ + super().__init__(repo, binsha, mode, path) self.size = 0 self._parent_commit = parent_commit if url is not None: self._url = url if branch_path is not None: - # assert isinstance(branch_path, str) self._branch_path = branch_path if name is not None: self._name = name @@ -139,7 +167,7 @@ class Submodule(IndexObject, TraversableIterableObj): def _set_cache_(self, attr: str) -> None: if attr in ("path", "_url", "_branch_path"): reader: SectionConstraint = self.config_reader() - # default submodule values + # Default submodule values. try: self.path = reader.get("path") except cp.NoSectionError as e: @@ -148,19 +176,19 @@ class Submodule(IndexObject, TraversableIterableObj): "This submodule instance does not exist anymore in '%s' file" % osp.join(self.repo.working_tree_dir, ".gitmodules") ) from e - # end + self._url = reader.get("url") - # git-python extension values - optional + # GitPython extension values - optional. self._branch_path = reader.get_value(self.k_head_option, git.Head.to_full_path(self.k_head_default)) elif attr == "_name": raise AttributeError("Cannot retrieve the name of a submodule if it was not set initially") else: - super(Submodule, self)._set_cache_(attr) + super()._set_cache_(attr) # END handle attribute name @classmethod def _get_intermediate_items(cls, item: "Submodule") -> IterableList["Submodule"]: - """:return: all the submodules of our module repository""" + """:return: All the submodules of our module repository""" try: return cls.list_items(item.module()) except InvalidGitRepositoryError: @@ -172,18 +200,18 @@ class Submodule(IndexObject, TraversableIterableObj): return git.version_info[:3] >= (1, 7, 5) def __eq__(self, other: Any) -> bool: - """Compare with another submodule""" - # we may only compare by name as this should be the ID they are hashed with - # Otherwise this type wouldn't be hashable - # return self.path == other.path and self.url == other.url and super(Submodule, self).__eq__(other) + """Compare with another submodule.""" + # We may only compare by name as this should be the ID they are hashed with. + # Otherwise this type wouldn't be hashable. + # return self.path == other.path and self.url == other.url and super().__eq__(other) return self._name == other._name def __ne__(self, other: object) -> bool: - """Compare with another submodule for inequality""" + """Compare with another submodule for inequality.""" return not (self == other) def __hash__(self) -> int: - """Hash this instance using its logical id, not the sha""" + """Hash this instance using its logical id, not the sha.""" return hash(self._name) def __str__(self) -> str: @@ -200,20 +228,26 @@ class Submodule(IndexObject, TraversableIterableObj): @classmethod def _config_parser( - cls, repo: "Repo", parent_commit: Union[Commit_ish, None], read_only: bool + cls, repo: "Repo", parent_commit: Union["Commit", None], read_only: bool ) -> SubmoduleConfigParser: - """:return: Config Parser constrained to our submodule in read or write mode - :raise IOError: If the .gitmodules file cannot be found, either locally or in the repository - at the given parent commit. Otherwise the exception would be delayed until the first - access of the config parser""" + """ + :return: + Config parser constrained to our submodule in read or write mode + + :raise IOError: + If the ``.gitmodules`` file cannot be found, either locally or in the + repository at the given parent commit. Otherwise the exception would be + delayed until the first access of the config parser. + """ parent_matches_head = True if parent_commit is not None: try: parent_matches_head = repo.head.commit == parent_commit except ValueError: - # We are most likely in an empty repository, so the HEAD doesn't point to a valid ref + # We are most likely in an empty repository, so the HEAD doesn't point + # to a valid ref. pass - # end handle parent_commit + # END handle parent_commit fp_module: Union[str, BytesIO] if not repo.bare and parent_matches_head and repo.working_tree_dir: fp_module = osp.join(repo.working_tree_dir, cls.k_modules_file) @@ -235,7 +269,7 @@ class Submodule(IndexObject, TraversableIterableObj): return SubmoduleConfigParser(fp_module, read_only=read_only) def _clear_cache(self) -> None: - # clear the possibly changed values + """Clear the possibly changed values.""" for name in self._cache_attrs: try: delattr(self, name) @@ -245,19 +279,23 @@ class Submodule(IndexObject, TraversableIterableObj): # END for each name to delete @classmethod - def _sio_modules(cls, parent_commit: Commit_ish) -> BytesIO: - """:return: Configuration file as BytesIO - we only access it through the respective blob's data""" + def _sio_modules(cls, parent_commit: "Commit") -> BytesIO: + """ + :return: + Configuration file as :class:`~io.BytesIO` - we only access it through the + respective blob's data + """ sio = BytesIO(parent_commit.tree[cls.k_modules_file].data_stream.read()) sio.name = cls.k_modules_file return sio def _config_parser_constrained(self, read_only: bool) -> SectionConstraint: - """:return: Config Parser constrained to our submodule in read or write mode""" + """:return: Config parser constrained to our submodule in read or write mode""" try: - pc: Union["Commit_ish", None] = self.parent_commit + pc = self.parent_commit except ValueError: pc = None - # end handle empty parent repository + # END handle empty parent repository parser = self._config_parser(self.repo, pc, read_only) parser.set_submodule(self) return SectionConstraint(parser, sm_section(self.name)) @@ -269,7 +307,6 @@ class Submodule(IndexObject, TraversableIterableObj): if parent_repo.working_tree_dir: return osp.join(parent_repo.working_tree_dir, path) raise NotADirectoryError() - # end @classmethod def _clone_repo( @@ -282,14 +319,31 @@ class Submodule(IndexObject, TraversableIterableObj): allow_unsafe_protocols: bool = False, **kwargs: Any, ) -> "Repo": - """:return: Repo instance of newly cloned repository - :param repo: our parent repository - :param url: url to clone from - :param path: repository - relative path to the submodule checkout location - :param name: canonical of the submodule - :param allow_unsafe_protocols: Allow unsafe protocols to be used, like ext - :param allow_unsafe_options: Allow unsafe options to be used, like --upload-pack - :param kwargs: additional arguments given to git.clone""" + """ + :return: + :class:`~git.repo.base.Repo` instance of newly cloned repository. + + :param repo: + Our parent repository. + + :param url: + URL to clone from. + + :param path: + Repository-relative path to the submodule checkout location. + + :param name: + Canonical name of the submodule. + + :param allow_unsafe_protocols: + Allow unsafe protocols to be used, like ``ext``. + + :param allow_unsafe_options: + Allow unsafe options to be used, like ``--upload-pack``. + + :param kwargs: + Additional arguments given to :manpage:`git-clone(1)`. + """ module_abspath = cls._module_abspath(repo, path, name) module_checkout_path = module_abspath if cls._need_gitfile_submodules(repo.git): @@ -298,7 +352,6 @@ class Submodule(IndexObject, TraversableIterableObj): if not osp.isdir(module_abspath_dir): os.makedirs(module_abspath_dir) module_checkout_path = osp.join(str(repo.working_tree_dir), path) - # end clone = git.Repo.clone_from( url, @@ -309,13 +362,16 @@ class Submodule(IndexObject, TraversableIterableObj): ) if cls._need_gitfile_submodules(repo.git): cls._write_git_file_and_module_config(module_checkout_path, module_abspath) - # end + return clone @classmethod def _to_relative_path(cls, parent_repo: "Repo", path: PathLike) -> PathLike: - """:return: a path guaranteed to be relative to the given parent - repository - :raise ValueError: if path is not contained in the parent repository's working tree""" + """:return: A path guaranteed to be relative to the given parent repository + + :raise ValueError: + If path is not contained in the parent repository's working tree. + """ path = to_native_path_linux(path) if path.endswith("/"): path = path[:-1] @@ -331,27 +387,38 @@ class Submodule(IndexObject, TraversableIterableObj): path = path[len(working_tree_linux.rstrip("/")) + 1 :] if not path: raise ValueError("Absolute submodule path '%s' didn't yield a valid relative path" % path) - # end verify converted relative path makes sense - # end convert to a relative path + # END verify converted relative path makes sense + # END convert to a relative path return path @classmethod def _write_git_file_and_module_config(cls, working_tree_dir: PathLike, module_abspath: PathLike) -> None: - """Writes a .git file containing a(preferably) relative path to the actual git module repository. - It is an error if the module_abspath cannot be made into a relative path, relative to the working_tree_dir - :note: will overwrite existing files ! - :note: as we rewrite both the git file as well as the module configuration, we might fail on the configuration - and will not roll back changes done to the git file. This should be a non - issue, but may easily be fixed - if it becomes one - :param working_tree_dir: directory to write the .git file into - :param module_abspath: absolute path to the bare repository + """Write a ``.git`` file containing a (preferably) relative path to the actual + git module repository. + + It is an error if the `module_abspath` cannot be made into a relative path, + relative to the `working_tree_dir`. + + :note: + This will overwrite existing files! + + :note: + As we rewrite both the git file as well as the module configuration, we + might fail on the configuration and will not roll back changes done to the + git file. This should be a non-issue, but may easily be fixed if it becomes + one. + + :param working_tree_dir: + Directory to write the ``.git`` file into. + + :param module_abspath: + Absolute path to the bare repository. """ git_file = osp.join(working_tree_dir, ".git") rela_path = osp.relpath(module_abspath, start=working_tree_dir) - if is_win: - if osp.isfile(git_file): - os.remove(git_file) + if sys.platform == "win32" and osp.isfile(git_file): + os.remove(git_file) with open(git_file, "wb") as fp: fp.write(("gitdir: %s" % rela_path).encode(defenc)) @@ -379,57 +446,82 @@ class Submodule(IndexObject, TraversableIterableObj): allow_unsafe_options: bool = False, allow_unsafe_protocols: bool = False, ) -> "Submodule": - """Add a new submodule to the given repository. This will alter the index - as well as the .gitmodules file, but will not create a new commit. - If the submodule already exists, no matter if the configuration differs - from the one provided, the existing submodule will be returned. - - :param repo: Repository instance which should receive the submodule - :param name: The name/identifier for the submodule - :param path: repository-relative or absolute path at which the submodule - should be located + """Add a new submodule to the given repository. This will alter the index as + well as the ``.gitmodules`` file, but will not create a new commit. If the + submodule already exists, no matter if the configuration differs from the one + provided, the existing submodule will be returned. + + :param repo: + Repository instance which should receive the submodule. + + :param name: + The name/identifier for the submodule. + + :param path: + Repository-relative or absolute path at which the submodule should be + located. It will be created as required during the repository initialization. - :param url: git-clone compatible URL, see git-clone reference for more information - If None, the repository is assumed to exist, and the url of the first - remote is taken instead. This is useful if you want to make an existing - repository a submodule of anotherone. - :param branch: name of branch at which the submodule should (later) be checked out. - The given branch must exist in the remote repository, and will be checked - out locally as a tracking branch. - It will only be written into the configuration if it not None, which is + + :param url: + ``git clone ...``-compatible URL. See :manpage:`git-clone(1)` for more + information. If ``None``, the repository is assumed to exist, and the URL of + the first remote is taken instead. This is useful if you want to make an + existing repository a submodule of another one. + + :param branch: + Name of branch at which the submodule should (later) be checked out. The + given branch must exist in the remote repository, and will be checked out + locally as a tracking branch. + It will only be written into the configuration if it not ``None``, which is when the checked out branch will be the one the remote HEAD pointed to. - The result you get in these situation is somewhat fuzzy, and it is recommended - to specify at least 'master' here. - Examples are 'master' or 'feature/new' - :param no_checkout: if True, and if the repository has to be cloned manually, - no checkout will be performed - :param depth: Create a shallow clone with a history truncated to the - specified number of commits. - :param env: Optional dictionary containing the desired environment variables. - Note: Provided variables will be used to update the execution - environment for `git`. If some variable is not specified in `env` - and is defined in `os.environ`, value from `os.environ` will be used. - If you want to unset some variable, consider providing empty string - as its value. - :param clone_multi_options: A list of Clone options. Please see ``git.repo.base.Repo.clone`` - for details. - :param allow_unsafe_protocols: Allow unsafe protocols to be used, like ext - :param allow_unsafe_options: Allow unsafe options to be used, like --upload-pack - :return: The newly created submodule instance - :note: works atomically, such that no change will be done if the repository - update fails for instance""" + The result you get in these situation is somewhat fuzzy, and it is + recommended to specify at least ``master`` here. + Examples are ``master`` or ``feature/new``. + + :param no_checkout: + If ``True``, and if the repository has to be cloned manually, no checkout + will be performed. + :param depth: + Create a shallow clone with a history truncated to the specified number of + commits. + + :param env: + Optional dictionary containing the desired environment variables. + + Note: Provided variables will be used to update the execution environment + for ``git``. If some variable is not specified in `env` and is defined in + attr:`os.environ`, the value from attr:`os.environ` will be used. If you + want to unset some variable, consider providing an empty string as its + value. + + :param clone_multi_options: + A list of clone options. Please see + :meth:`Repo.clone <git.repo.base.Repo.clone>` for details. + + :param allow_unsafe_protocols: + Allow unsafe protocols to be used, like ``ext``. + + :param allow_unsafe_options: + Allow unsafe options to be used, like ``--upload-pack``. + + :return: + The newly created :class:`Submodule` instance. + + :note: + Works atomically, such that no change will be done if, for example, the + repository update fails. + """ if repo.bare: raise InvalidGitRepositoryError("Cannot add submodules to bare repositories") # END handle bare repos path = cls._to_relative_path(repo, path) - # assure we never put backslashes into the url, as some operating systems - # like it ... + # Ensure we never put backslashes into the URL, as might happen on Windows. if url is not None: url = to_native_path_linux(url) - # END assure url correctness + # END ensure URL correctness # INSTANTIATE INTERMEDIATE SM sm = cls( @@ -441,13 +533,13 @@ class Submodule(IndexObject, TraversableIterableObj): url="invalid-temporary", ) if sm.exists(): - # reretrieve submodule from tree + # Reretrieve submodule from tree. try: sm = repo.head.commit.tree[str(path)] sm._name = name return sm except KeyError: - # could only be in index + # Could only be in index. index = repo.index entry = index.entries[index.entry_key(path, 0)] sm.binsha = entry.binsha @@ -455,7 +547,7 @@ class Submodule(IndexObject, TraversableIterableObj): # END handle exceptions # END handle existing - # fake-repo - we only need the functionality on the branch instance + # fake-repo - we only need the functionality on the branch instance. br = git.Head(repo, git.Head.to_full_path(str(branch) or cls.k_head_default)) has_module = sm.module_exists() branch_is_default = branch is None @@ -481,7 +573,7 @@ class Submodule(IndexObject, TraversableIterableObj): # END verify we have url url = urls[0] else: - # clone new repo + # Clone new repo. kwargs: Dict[str, Union[bool, int, str, Sequence[TBD]]] = {"n": no_checkout} if not branch_is_default: kwargs["b"] = br.name @@ -508,11 +600,11 @@ class Submodule(IndexObject, TraversableIterableObj): ) # END verify url - ## See #525 for ensuring git urls in config-files valid under Windows. + ## See #525 for ensuring git URLs in config-files are valid under Windows. url = Git.polish_url(url) # It's important to add the URL to the parent config, to let `git submodule` know. - # otherwise there is a '-' character in front of the submodule listing + # Otherwise there is a '-' character in front of the submodule listing: # a38efa84daef914e4de58d1905a500d8d14aaf45 mymodule (v0.9.0-1-ga38efa8) # -a38efa84daef914e4de58d1905a500d8d14aaf45 submodules/intermediate/one writer: Union[GitConfigParser, SectionConstraint] @@ -520,7 +612,7 @@ class Submodule(IndexObject, TraversableIterableObj): with sm.repo.config_writer() as writer: writer.set_value(sm_section(name), "url", url) - # update configuration and index + # Update configuration and index. index = sm.repo.index with sm.config_writer(index=index, write=False) as writer: writer.set_value("url", url) @@ -528,11 +620,11 @@ class Submodule(IndexObject, TraversableIterableObj): sm._url = url if not branch_is_default: - # store full path + # Store full path. writer.set_value(cls.k_head_option, br.path) sm._branch_path = br.path - # we deliberately assume that our head matches our index ! + # We deliberately assume that our head matches our index! if mrepo: sm.binsha = mrepo.head.commit.binsha index.add([sm], write=True) @@ -553,43 +645,76 @@ class Submodule(IndexObject, TraversableIterableObj): allow_unsafe_options: bool = False, allow_unsafe_protocols: bool = False, ) -> "Submodule": - """Update the repository of this submodule to point to the checkout - we point at with the binsha of this instance. - - :param recursive: if True, we will operate recursively and update child- - modules as well. - :param init: if True, the module repository will be cloned into place if necessary - :param to_latest_revision: if True, the submodule's sha will be ignored during checkout. - Instead, the remote will be fetched, and the local tracking branch updated. - This only works if we have a local tracking branch, which is the case - if the remote repository had a master branch, or of the 'branch' option - was specified for this submodule and the branch existed remotely - :param progress: UpdateProgress instance or None if no progress should be shown - :param dry_run: if True, the operation will only be simulated, but not performed. - All performed operations are read - only + """Update the repository of this submodule to point to the checkout we point at + with the binsha of this instance. + + :param recursive: + If ``True``, we will operate recursively and update child modules as well. + + :param init: + If ``True``, the module repository will be cloned into place if necessary. + + :param to_latest_revision: + If ``True``, the submodule's sha will be ignored during checkout. Instead, + the remote will be fetched, and the local tracking branch updated. This only + works if we have a local tracking branch, which is the case if the remote + repository had a master branch, or if the ``branch`` option was specified + for this submodule and the branch existed remotely. + + :param progress: + :class:`UpdateProgress` instance, or ``None`` if no progress should be + shown. + + :param dry_run: + If ``True``, the operation will only be simulated, but not performed. + All performed operations are read-only. + :param force: - If True, we may reset heads even if the repository in question is dirty. Additinoally we will be allowed - to set a tracking branch which is ahead of its remote branch back into the past or the location of the - remote branch. This will essentially 'forget' commits. - If False, local tracking branches that are in the future of their respective remote branches will simply - not be moved. - :param keep_going: if True, we will ignore but log all errors, and keep going recursively. - Unless dry_run is set as well, keep_going could cause subsequent / inherited errors you wouldn't see - otherwise. - In conjunction with dry_run, it can be useful to anticipate all errors when updating submodules - :param env: Optional dictionary containing the desired environment variables. - Note: Provided variables will be used to update the execution - environment for `git`. If some variable is not specified in `env` - and is defined in `os.environ`, value from `os.environ` will be used. - If you want to unset some variable, consider providing empty string - as its value. - :param clone_multi_options: list of Clone options. Please see ``git.repo.base.Repo.clone`` - for details. Only take effect with `init` option. - :param allow_unsafe_protocols: Allow unsafe protocols to be used, like ext - :param allow_unsafe_options: Allow unsafe options to be used, like --upload-pack - :note: does nothing in bare repositories - :note: method is definitely not atomic if recurisve is True - :return: self""" + If ``True``, we may reset heads even if the repository in question is dirty. + Additionally we will be allowed to set a tracking branch which is ahead of + its remote branch back into the past or the location of the remote branch. + This will essentially 'forget' commits. + + If ``False``, local tracking branches that are in the future of their + respective remote branches will simply not be moved. + + :param keep_going: + If ``True``, we will ignore but log all errors, and keep going recursively. + Unless `dry_run` is set as well, `keep_going` could cause + subsequent/inherited errors you wouldn't see otherwise. + In conjunction with `dry_run`, it can be useful to anticipate all errors + when updating submodules. + + :param env: + Optional dictionary containing the desired environment variables. + + Note: Provided variables will be used to update the execution environment + for ``git``. If some variable is not specified in `env` and is defined in + attr:`os.environ`, value from attr:`os.environ` will be used. + + If you want to unset some variable, consider providing the empty string as + its value. + + :param clone_multi_options: + List of :manpage:`git-clone(1)` options. + Please see :meth:`Repo.clone <git.repo.base.Repo.clone>` for details. + They only take effect with the `init` option. + + :param allow_unsafe_protocols: + Allow unsafe protocols to be used, like ``ext``. + + :param allow_unsafe_options: + Allow unsafe options to be used, like ``--upload-pack``. + + :note: + Does nothing in bare repositories. + + :note: + This method is definitely not atomic if `recursive` is ``True``. + + :return: + self + """ if self.repo.bare: return self # END pass in bare mode @@ -602,14 +727,14 @@ class Submodule(IndexObject, TraversableIterableObj): prefix = "DRY-RUN: " # END handle prefix - # to keep things plausible in dry-run mode + # To keep things plausible in dry-run mode. if dry_run: mrepo = None # END init mrepo try: - # ASSURE REPO IS PRESENT AND UPTODATE - ##################################### + # ENSURE REPO IS PRESENT AND UP-TO-DATE + ####################################### try: mrepo = self.module() rmts = mrepo.remotes @@ -647,7 +772,7 @@ class Submodule(IndexObject, TraversableIterableObj): return self # END early abort if init is not allowed - # there is no git-repository yet - but delete empty paths + # There is no git-repository yet - but delete empty paths. checkout_module_abspath = self.abspath if not dry_run and osp.isdir(checkout_module_abspath): try: @@ -659,8 +784,8 @@ class Submodule(IndexObject, TraversableIterableObj): # END handle OSError # END handle directory removal - # don't check it out at first - nonetheless it will create a local - # branch according to the remote-HEAD if possible + # Don't check it out at first - nonetheless it will create a local + # branch according to the remote-HEAD if possible. progress.update( BEGIN | CLONE, 0, @@ -689,43 +814,45 @@ class Submodule(IndexObject, TraversableIterableObj): ) if not dry_run: - # see whether we have a valid branch to checkout + # See whether we have a valid branch to check out. try: mrepo = cast("Repo", mrepo) - # find a remote which has our branch - we try to be flexible + # Find a remote which has our branch - we try to be flexible. remote_branch = find_first_remote_branch(mrepo.remotes, self.branch_name) local_branch = mkhead(mrepo, self.branch_path) - # have a valid branch, but no checkout - make sure we can figure - # that out by marking the commit with a null_sha + # Have a valid branch, but no checkout - make sure we can figure + # that out by marking the commit with a null_sha. local_branch.set_object(Object(mrepo, self.NULL_BIN_SHA)) # END initial checkout + branch creation - # make sure HEAD is not detached + # Make sure HEAD is not detached. mrepo.head.set_reference( local_branch, logmsg="submodule: attaching head to %s" % local_branch, ) mrepo.head.reference.set_tracking_branch(remote_branch) except (IndexError, InvalidGitRepositoryError): - log.warning("Failed to checkout tracking branch %s", self.branch_path) + _logger.warning("Failed to checkout tracking branch %s", self.branch_path) # END handle tracking branch - # NOTE: Have to write the repo config file as well, otherwise - # the default implementation will be offended and not update the repository - # Maybe this is a good way to assure it doesn't get into our way, but - # we want to stay backwards compatible too ... . Its so redundant ! + # NOTE: Have to write the repo config file as well, otherwise the + # default implementation will be offended and not update the + # repository. Maybe this is a good way to ensure it doesn't get into + # our way, but we want to stay backwards compatible too... It's so + # redundant! with self.repo.config_writer() as writer: writer.set_value(sm_section(self.name), "url", self.url) # END handle dry_run # END handle initialization - # DETERMINE SHAS TO CHECKOUT - ############################ + # DETERMINE SHAS TO CHECK OUT + ############################# binsha = self.binsha hexsha = self.hexsha if mrepo is not None: - # mrepo is only set if we are not in dry-run mode or if the module existed + # mrepo is only set if we are not in dry-run mode or if the module + # existed. is_detached = mrepo.head.is_detached # END handle dry_run @@ -738,32 +865,34 @@ class Submodule(IndexObject, TraversableIterableObj): binsha = rcommit.binsha hexsha = rcommit.hexsha else: - log.error( + _logger.error( "%s a tracking branch was not set for local branch '%s'", msg_base, mrepo.head.reference, ) # END handle remote ref else: - log.error("%s there was no local tracking branch", msg_base) + _logger.error("%s there was no local tracking branch", msg_base) # END handle detached head # END handle to_latest_revision option - # update the working tree - # handles dry_run + # Update the working tree. + # Handles dry_run. if mrepo is not None and mrepo.head.commit.binsha != binsha: - # We must assure that our destination sha (the one to point to) is in the future of our current head. - # Otherwise, we will reset changes that might have been done on the submodule, but were not yet pushed - # We also handle the case that history has been rewritten, leaving no merge-base. In that case - # we behave conservatively, protecting possible changes the user had done + # We must ensure that our destination sha (the one to point to) is in + # the future of our current head. Otherwise, we will reset changes that + # might have been done on the submodule, but were not yet pushed. We + # also handle the case that history has been rewritten, leaving no + # merge-base. In that case we behave conservatively, protecting possible + # changes the user had done. may_reset = True if mrepo.head.commit.binsha != self.NULL_BIN_SHA: base_commit = mrepo.merge_base(mrepo.head.commit, hexsha) if len(base_commit) == 0 or (base_commit[0] is not None and base_commit[0].hexsha == hexsha): if force: msg = "Will force checkout or reset on local branch that is possibly in the future of" - msg += "the commit it will be checked out to, effectively 'forgetting' new commits" - log.debug(msg) + msg += " the commit it will be checked out to, effectively 'forgetting' new commits" + _logger.debug(msg) else: msg = "Skipping %s on branch '%s' of submodule repo '%s' as it contains un-pushed commits" msg %= ( @@ -771,17 +900,17 @@ class Submodule(IndexObject, TraversableIterableObj): mrepo.head, mrepo, ) - log.info(msg) + _logger.info(msg) may_reset = False - # end handle force - # end handle if we are in the future + # END handle force + # END handle if we are in the future if may_reset and not force and mrepo.is_dirty(index=True, working_tree=True, untracked_files=True): raise RepositoryDirtyError(mrepo, "Cannot reset a dirty repository") - # end handle force and dirty state - # end handle empty repo + # END handle force and dirty state + # END handle empty repo - # end verify future/past + # END verify future/past progress.update( BEGIN | UPDWKTREE, 0, @@ -792,15 +921,15 @@ class Submodule(IndexObject, TraversableIterableObj): if not dry_run and may_reset: if is_detached: - # NOTE: for now we force, the user is no supposed to change detached - # submodules anyway. Maybe at some point this becomes an option, to - # properly handle user modifications - see below for future options - # regarding rebase and merge. + # NOTE: For now we force. The user is not supposed to change + # detached submodules anyway. Maybe at some point this becomes + # an option, to properly handle user modifications - see below + # for future options regarding rebase and merge. mrepo.git.checkout(hexsha, force=force) else: mrepo.head.reset(hexsha, index=True, working_tree=True) # END handle checkout - # if we may reset/checkout + # If we may reset/checkout. progress.update( END | UPDWKTREE, 0, @@ -811,13 +940,13 @@ class Submodule(IndexObject, TraversableIterableObj): except Exception as err: if not keep_going: raise - log.error(str(err)) - # end handle keep_going + _logger.error(str(err)) + # END handle keep_going # HANDLE RECURSION ################## if recursive: - # in dry_run mode, the module might not exist + # In dry_run mode, the module might not exist. if mrepo is not None: for submodule in self.iter_items(self.module()): submodule.update( @@ -841,19 +970,30 @@ class Submodule(IndexObject, TraversableIterableObj): the repository at our current path, changing the configuration, as well as adjusting our index entry accordingly. - :param module_path: the path to which to move our module in the parent repostory's working tree, - given as repository - relative or absolute path. Intermediate directories will be created - accordingly. If the path already exists, it must be empty. - Trailing(back)slashes are removed automatically - :param configuration: if True, the configuration will be adjusted to let - the submodule point to the given path. - :param module: if True, the repository managed by this submodule - will be moved as well. If False, we don't move the submodule's checkout, which may leave - the parent repository in an inconsistent state. - :return: self - :raise ValueError: if the module path existed and was not empty, or was a file - :note: Currently the method is not atomic, and it could leave the repository - in an inconsistent state if a sub - step fails for some reason + :param module_path: + The path to which to move our module in the parent repository's working + tree, given as repository-relative or absolute path. Intermediate + directories will be created accordingly. If the path already exists, it must + be empty. Trailing (back)slashes are removed automatically. + + :param configuration: + If ``True``, the configuration will be adjusted to let the submodule point + to the given path. + + :param module: + If ``True``, the repository managed by this submodule will be moved as well. + If ``False``, we don't move the submodule's checkout, which may leave the + parent repository in an inconsistent state. + + :return: + self + + :raise ValueError: + If the module path existed and was not empty, or was a file. + + :note: + Currently the method is not atomic, and it could leave the repository in an + inconsistent state if a sub-step fails for some reason. """ if module + configuration < 1: raise ValueError("You must specify to move at least the module or the configuration of the submodule") @@ -878,7 +1018,7 @@ class Submodule(IndexObject, TraversableIterableObj): raise ValueError("Index entry for target path did already exist") # END handle index key already there - # remove existing destination + # Remove existing destination. if module: if osp.exists(module_checkout_abspath): if len(os.listdir(module_checkout_abspath)): @@ -891,13 +1031,13 @@ class Submodule(IndexObject, TraversableIterableObj): os.rmdir(module_checkout_abspath) # END handle link else: - # recreate parent directories - # NOTE: renames() does that now + # Recreate parent directories. + # NOTE: renames() does that now. pass # END handle existence # END handle module - # move the module into place if possible + # Move the module into place if possible. cur_path = self.abspath renamed_module = False if module and osp.exists(cur_path): @@ -907,11 +1047,11 @@ class Submodule(IndexObject, TraversableIterableObj): if osp.isfile(osp.join(module_checkout_abspath, ".git")): module_abspath = self._module_abspath(self.repo, self.path, self.name) self._write_git_file_and_module_config(module_checkout_abspath, module_abspath) - # end handle git file rewrite + # END handle git file rewrite # END move physical module - # rename the index entry - have to manipulate the index directly as - # git-mv cannot be used on submodules ... yeah + # Rename the index entry - we have to manipulate the index directly as git-mv + # cannot be used on submodules... yeah. previous_sm_path = self.path try: if configuration: @@ -925,8 +1065,8 @@ class Submodule(IndexObject, TraversableIterableObj): raise InvalidGitRepositoryError("Submodule's entry at %r did not exist" % (self.path)) from e # END handle submodule doesn't exist - # update configuration - with self.config_writer(index=index) as writer: # auto-write + # Update configuration. + with self.config_writer(index=index) as writer: # Auto-write. writer.set_value("path", module_checkout_path) self.path = module_checkout_path # END handle configuration flag @@ -937,10 +1077,10 @@ class Submodule(IndexObject, TraversableIterableObj): raise # END handle undo rename - # Auto-rename submodule if it's name was 'default', that is, the checkout directory + # Auto-rename submodule if its name was 'default', that is, the checkout + # directory. if previous_sm_path == self.name: self.rename(module_checkout_path) - # end return self @@ -953,47 +1093,65 @@ class Submodule(IndexObject, TraversableIterableObj): dry_run: bool = False, ) -> "Submodule": """Remove this submodule from the repository. This will remove our entry - from the .gitmodules file and the entry in the .git / config file. - - :param module: If True, the module checkout we point to will be deleted - as well. If the module is currently on a commit which is not part - of any branch in the remote, if the currently checked out branch - working tree, or untracked files, - is ahead of its tracking branch, if you have modifications in the - In case the removal of the repository fails for these reasons, the - submodule status will not have been altered. - If this submodule has child - modules on its own, these will be deleted - prior to touching the own module. - :param force: Enforces the deletion of the module even though it contains - modifications. This basically enforces a brute - force file system based - deletion. - :param configuration: if True, the submodule is deleted from the configuration, - otherwise it isn't. Although this should be enabled most of the times, - this flag enables you to safely delete the repository of your submodule. - :param dry_run: if True, we will not actually do anything, but throw the errors - we would usually throw - :return: self - :note: doesn't work in bare repositories - :note: doesn't work atomically, as failure to remove any part of the submodule will leave - an inconsistent state - :raise InvalidGitRepositoryError: thrown if the repository cannot be deleted - :raise OSError: if directories or files could not be removed""" + from the ``.gitmodules`` file and the entry in the ``.git/config`` file. + + :param module: + If ``True``, the checked out module we point to will be deleted as well. If + that module is currently on a commit outside any branch in the remote, or if + it is ahead of its tracking branch, or if there are modified or untracked + files in its working tree, then the removal will fail. In case the removal + of the repository fails for these reasons, the submodule status will not + have been altered. + + If this submodule has child modules of its own, these will be deleted prior + to touching the direct submodule. + + :param force: + Enforces the deletion of the module even though it contains modifications. + This basically enforces a brute-force file system based deletion. + + :param configuration: + If ``True``, the submodule is deleted from the configuration, otherwise it + isn't. Although this should be enabled most of the time, this flag enables + you to safely delete the repository of your submodule. + + :param dry_run: + If ``True``, we will not actually do anything, but throw the errors we would + usually throw. + + :return: + self + + :note: + Doesn't work in bare repositories. + + :note: + Doesn't work atomically, as failure to remove any part of the submodule will + leave an inconsistent state. + + :raise git.exc.InvalidGitRepositoryError: + Thrown if the repository cannot be deleted. + + :raise OSError: + If directories or files could not be removed. + """ if not (module or configuration): raise ValueError("Need to specify to delete at least the module, or the configuration") # END handle parameters - # Recursively remove children of this submodule + # Recursively remove children of this submodule. nc = 0 for csm in self.children(): nc += 1 csm.remove(module, force, configuration, dry_run) del csm - # end + if configuration and not dry_run and nc > 0: - # Assure we don't leave the parent repository in a dirty state, and commit our changes - # It's important for recursive, unforced, deletions to work as expected + # Ensure we don't leave the parent repository in a dirty state, and commit + # our changes. It's important for recursive, unforced, deletions to work as + # expected. self.module().index.commit("Removed at least one of child-modules of '%s'" % self.name) - # end handle recursion + # END handle recursion # DELETE REPOSITORY WORKING TREE ################################ @@ -1001,9 +1159,10 @@ class Submodule(IndexObject, TraversableIterableObj): mod = self.module() git_dir = mod.git_dir if force: - # take the fast lane and just delete everything in our module path - # TODO: If we run into permission problems, we have a highly inconsistent - # state. Delete the .git folders last, start with the submodules first + # Take the fast lane and just delete everything in our module path. + # TODO: If we run into permission problems, we have a highly + # inconsistent state. Delete the .git folders last, start with the + # submodules first. mp = self.abspath method: Union[None, Callable[[PathLike], None]] = None if osp.islink(mp): @@ -1018,7 +1177,7 @@ class Submodule(IndexObject, TraversableIterableObj): method(mp) # END apply deletion method else: - # verify we may delete our module + # Verify we may delete our module. if mod.is_dirty(index=True, working_tree=True, untracked_files=True): raise InvalidGitRepositoryError( "Cannot delete module at %s with any modifications, unless force is specified" @@ -1026,25 +1185,27 @@ class Submodule(IndexObject, TraversableIterableObj): ) # END check for dirt - # figure out whether we have new commits compared to the remotes - # NOTE: If the user pulled all the time, the remote heads might - # not have been updated, so commits coming from the remote look - # as if they come from us. But we stay strictly read-only and - # don't fetch beforehand. + # Figure out whether we have new commits compared to the remotes. + # NOTE: If the user pulled all the time, the remote heads might not have + # been updated, so commits coming from the remote look as if they come + # from us. But we stay strictly read-only and don't fetch beforehand. for remote in mod.remotes: num_branches_with_new_commits = 0 rrefs = remote.refs for rref in rrefs: num_branches_with_new_commits += len(mod.git.cherry(rref)) != 0 # END for each remote ref - # not a single remote branch contained all our commits + # Not a single remote branch contained all our commits. if len(rrefs) and num_branches_with_new_commits == len(rrefs): raise InvalidGitRepositoryError( "Cannot delete module at %s as there are new commits" % mod.working_tree_dir ) # END handle new commits - # have to manually delete references as python's scoping is - # not existing, they could keep handles open ( on windows this is a problem ) + # We have to manually delete some references to allow resources to + # be cleaned up immediately when we are done with them, because + # Python's scoping is no more granular than the whole function (loop + # bodies are not scopes). When the objects stay alive longer, they + # can keep handles open. On Windows, this is a problem. if len(rrefs): del rref # skipcq: PYL-W0631 # END handle remotes @@ -1052,47 +1213,30 @@ class Submodule(IndexObject, TraversableIterableObj): del remote # END for each remote - # finally delete our own submodule + # Finally delete our own submodule. if not dry_run: self._clear_cache() wtd = mod.working_tree_dir - del mod # release file-handles (windows) - import gc - + del mod # Release file-handles (Windows). gc.collect() - try: - rmtree(str(wtd)) - except Exception as ex: - if HIDE_WINDOWS_KNOWN_ERRORS: - from unittest import SkipTest - - raise SkipTest("FIXME: fails with: PermissionError\n {}".format(ex)) from ex - raise + rmtree(str(wtd)) # END delete tree if possible # END handle force if not dry_run and osp.isdir(git_dir): self._clear_cache() - try: - rmtree(git_dir) - except Exception as ex: - if HIDE_WINDOWS_KNOWN_ERRORS: - from unittest import SkipTest - - raise SkipTest(f"FIXME: fails with: PermissionError\n {ex}") from ex - else: - raise - # end handle separate bare repository + rmtree(git_dir) + # END handle separate bare repository # END handle module deletion - # void our data not to delay invalid access + # Void our data so as not to delay invalid access. if not dry_run: self._clear_cache() # DELETE CONFIGURATION ###################### if configuration and not dry_run: - # first the index-entry + # First the index-entry. parent_index = self.repo.index try: del parent_index.entries[parent_index.entry_key(self.path, 0)] @@ -1101,8 +1245,8 @@ class Submodule(IndexObject, TraversableIterableObj): # END delete entry parent_index.write() - # now git config - need the config intact, otherwise we can't query - # information anymore + # Now git config - we need the config intact, otherwise we can't query + # information anymore. with self.repo.config_writer() as gcp_writer: gcp_writer.remove_section(sm_section(self.name)) @@ -1113,24 +1257,31 @@ class Submodule(IndexObject, TraversableIterableObj): return self - def set_parent_commit(self, commit: Union[Commit_ish, None], check: bool = True) -> "Submodule": + def set_parent_commit(self, commit: Union[Commit_ish, str, None], check: bool = True) -> "Submodule": """Set this instance to use the given commit whose tree is supposed to - contain the .gitmodules blob. + contain the ``.gitmodules`` blob. :param commit: - Commit'ish reference pointing at the root_tree, or None to always point to the - most recent commit + Commit-ish reference pointing at the root tree, or ``None`` to always point + to the most recent commit. + :param check: - if True, relatively expensive checks will be performed to verify + If ``True``, relatively expensive checks will be performed to verify validity of the submodule. - :raise ValueError: if the commit's tree didn't contain the .gitmodules blob. + + :raise ValueError: + If the commit's tree didn't contain the ``.gitmodules`` blob. + :raise ValueError: - if the parent commit didn't store this submodule under the current path - :return: self""" + If the parent commit didn't store this submodule under the current path. + + :return: + self + """ if commit is None: self._parent_commit = None return self - # end handle None + # END handle None pcommit = self.repo.commit(commit) pctree = pcommit.tree if self.k_modules_file not in pctree: @@ -1148,14 +1299,13 @@ class Submodule(IndexObject, TraversableIterableObj): # END handle submodule did not exist # END handle checking mode - # update our sha, it could have changed - # If check is False, we might see a parent-commit that doesn't even contain the submodule anymore. - # in that case, mark our sha as being NULL + # Update our sha, it could have changed. + # If check is False, we might see a parent-commit that doesn't even contain the + # submodule anymore. in that case, mark our sha as being NULL. try: self.binsha = pctree[str(self.path)].binsha except KeyError: self.binsha = self.NULL_BIN_SHA - # end self._clear_cache() return self @@ -1164,19 +1314,31 @@ class Submodule(IndexObject, TraversableIterableObj): def config_writer( self, index: Union["IndexFile", None] = None, write: bool = True ) -> SectionConstraint["SubmoduleConfigParser"]: - """:return: a config writer instance allowing you to read and write the data - belonging to this submodule into the .gitmodules file. - - :param index: if not None, an IndexFile instance which should be written. - defaults to the index of the Submodule's parent repository. - :param write: if True, the index will be written each time a configuration - value changes. - :note: the parameters allow for a more efficient writing of the index, - as you can pass in a modified index on your own, prevent automatic writing, - and write yourself once the whole operation is complete - :raise ValueError: if trying to get a writer on a parent_commit which does not - match the current head commit - :raise IOError: If the .gitmodules file/blob could not be read""" + """ + :return: + A config writer instance allowing you to read and write the data belonging + to this submodule into the ``.gitmodules`` file. + + :param index: + If not ``None``, an :class:`~git.index.base.IndexFile` instance which should + be written. Defaults to the index of the :class:`Submodule`'s parent + repository. + + :param write: + If ``True``, the index will be written each time a configuration value changes. + + :note: + The parameters allow for a more efficient writing of the index, as you can + pass in a modified index on your own, prevent automatic writing, and write + yourself once the whole operation is complete. + + :raise ValueError: + If trying to get a writer on a parent_commit which does not match the + current head commit. + + :raise IOError: + If the ``.gitmodules`` file/blob could not be read. + """ writer = self._config_parser_constrained(read_only=False) if index is not None: writer.config._index = index @@ -1185,25 +1347,29 @@ class Submodule(IndexObject, TraversableIterableObj): @unbare_repo def rename(self, new_name: str) -> "Submodule": - """Rename this submodule - :note: This method takes care of renaming the submodule in various places, such as + """Rename this submodule. - * $parent_git_dir / config - * $working_tree_dir / .gitmodules + :note: + This method takes care of renaming the submodule in various places, such as: + + * ``$parent_git_dir / config`` + * ``$working_tree_dir / .gitmodules`` * (git >= v1.8.0: move submodule repository to new name) - As .gitmodules will be changed, you would need to make a commit afterwards. The changed .gitmodules file - will already be added to the index + As ``.gitmodules`` will be changed, you would need to make a commit afterwards. + The changed ``.gitmodules`` file will already be added to the index. - :return: this submodule instance + :return: + This :class:`Submodule` instance """ if self.name == new_name: return self # .git/config with self.repo.config_writer() as pw: - # As we ourselves didn't write anything about submodules into the parent .git/config, - # we will not require it to exist, and just ignore missing entries. + # As we ourselves didn't write anything about submodules into the parent + # .git/config, we will not require it to exist, and just ignore missing + # entries. if pw.has_section(sm_section(self.name)): pw.rename_section(sm_section(self.name), sm_section(new_name)) @@ -1218,16 +1384,16 @@ class Submodule(IndexObject, TraversableIterableObj): if mod.has_separate_working_tree(): destination_module_abspath = self._module_abspath(self.repo, self.path, new_name) source_dir = mod.git_dir - # Let's be sure the submodule name is not so obviously tied to a directory + # Let's be sure the submodule name is not so obviously tied to a directory. if str(destination_module_abspath).startswith(str(mod.git_dir)): tmp_dir = self._module_abspath(self.repo, self.path, str(uuid.uuid4())) os.renames(source_dir, tmp_dir) source_dir = tmp_dir - # end handle self-containment + # END handle self-containment os.renames(source_dir, destination_module_abspath) if mod.working_tree_dir: self._write_git_file_and_module_config(mod.working_tree_dir, destination_module_abspath) - # end move separate git repository + # END move separate git repository return self @@ -1237,10 +1403,15 @@ class Submodule(IndexObject, TraversableIterableObj): @unbare_repo def module(self) -> "Repo": - """:return: Repo instance initialized from the repository at our submodule path - :raise InvalidGitRepositoryError: if a repository was not available. This could - also mean that it was not yet initialized""" - # late import to workaround circular dependencies + """ + :return: + :class:`~git.repo.base.Repo` instance initialized from the repository at our + submodule path + + :raise git.exc.InvalidGitRepositoryError: + If a repository was not available. + This could also mean that it was not yet initialized. + """ module_checkout_abspath = self.abspath try: repo = git.Repo(module_checkout_abspath) @@ -1254,7 +1425,11 @@ class Submodule(IndexObject, TraversableIterableObj): # END handle exceptions def module_exists(self) -> bool: - """:return: True if our module exists and is a valid git repository. See module() method""" + """ + :return: + ``True`` if our module exists and is a valid git repository. + See the :meth:`module` method. + """ try: self.module() return True @@ -1264,11 +1439,13 @@ class Submodule(IndexObject, TraversableIterableObj): def exists(self) -> bool: """ - :return: True if the submodule exists, False otherwise. Please note that - a submodule may exist ( in the .gitmodules file) even though its module - doesn't exist on disk""" - # keep attributes for later, and restore them if we have no valid data - # this way we do not actually alter the state of the object + :return: + ``True`` if the submodule exists, ``False`` otherwise. + Please note that a submodule may exist (in the ``.gitmodules`` file) even + though its module doesn't exist on disk. + """ + # Keep attributes for later, and restore them if we have no valid data. + # This way we do not actually alter the state of the object. loc = locals() for attr in self._cache_attrs: try: @@ -1276,14 +1453,15 @@ class Submodule(IndexObject, TraversableIterableObj): loc[attr] = getattr(self, attr) # END if we have the attribute cache except (cp.NoSectionError, ValueError): - # on PY3, this can happen apparently ... don't know why this doesn't happen on PY2 + # On PY3, this can happen apparently... don't know why this doesn't + # happen on PY2. pass # END for each attr self._clear_cache() try: try: - self.path + self.path # noqa: B018 return True except Exception: return False @@ -1298,62 +1476,92 @@ class Submodule(IndexObject, TraversableIterableObj): @property def branch(self) -> "Head": - """:return: The branch instance that we are to checkout - :raise InvalidGitRepositoryError: if our module is not yet checked out""" + """ + :return: + The branch instance that we are to checkout + + :raise git.exc.InvalidGitRepositoryError: + If our module is not yet checked out. + """ return mkhead(self.module(), self._branch_path) @property def branch_path(self) -> PathLike: """ - :return: full(relative) path as string to the branch we would checkout - from the remote and track""" + :return: + Full repository-relative path as string to the branch we would checkout from + the remote and track + """ return self._branch_path @property def branch_name(self) -> str: - """:return: the name of the branch, which is the shortest possible branch name""" - # use an instance method, for this we create a temporary Head instance - # which uses a repository that is available at least ( it makes no difference ) + """ + :return: + The name of the branch, which is the shortest possible branch name + """ + # Use an instance method, for this we create a temporary Head instance which + # uses a repository that is available at least (it makes no difference). return git.Head(self.repo, self._branch_path).name @property def url(self) -> str: - """:return: The url to the repository which our module - repository refers to""" + """:return: The url to the repository our submodule's repository refers to""" return self._url @property - def parent_commit(self) -> "Commit_ish": - """:return: Commit instance with the tree containing the .gitmodules file - :note: will always point to the current head's commit if it was not set explicitly""" + def parent_commit(self) -> "Commit": + """ + :return: + :class:`~git.objects.commit.Commit` instance with the tree containing the + ``.gitmodules`` file + + :note: + Will always point to the current head's commit if it was not set explicitly. + """ if self._parent_commit is None: return self.repo.commit() return self._parent_commit @property def name(self) -> str: - """:return: The name of this submodule. It is used to identify it within the - .gitmodules file. - :note: by default, the name is the path at which to find the submodule, but - in git - python it should be a unique identifier similar to the identifiers - used for remotes, which allows to change the path of the submodule - easily + """ + :return: + The name of this submodule. It is used to identify it within the + ``.gitmodules`` file. + + :note: + By default, this is the name is the path at which to find the submodule, but + in GitPython it should be a unique identifier similar to the identifiers + used for remotes, which allows to change the path of the submodule easily. """ return self._name def config_reader(self) -> SectionConstraint[SubmoduleConfigParser]: """ - :return: ConfigReader instance which allows you to qurey the configuration values - of this submodule, as provided by the .gitmodules file - :note: The config reader will actually read the data directly from the repository + :return: + ConfigReader instance which allows you to query the configuration values of + this submodule, as provided by the ``.gitmodules`` file. + + :note: + The config reader will actually read the data directly from the repository and thus does not need nor care about your working tree. - :note: Should be cached by the caller and only kept as long as needed - :raise IOError: If the .gitmodules file/blob could not be read""" + + :note: + Should be cached by the caller and only kept as long as needed. + + :raise IOError: + If the ``.gitmodules`` file/blob could not be read. + """ return self._config_parser_constrained(read_only=True) def children(self) -> IterableList["Submodule"]: """ - :return: IterableList(Submodule, ...) an iterable list of submodules instances - which are children of this submodule or 0 if the submodule is not checked out""" + :return: + IterableList(Submodule, ...) An iterable list of :class:`Submodule` + instances which are children of this submodule or 0 if the submodule is not + checked out. + """ return self._get_intermediate_items(self) # } END query interface @@ -1365,15 +1573,19 @@ class Submodule(IndexObject, TraversableIterableObj): cls, repo: "Repo", parent_commit: Union[Commit_ish, str] = "HEAD", - *Args: Any, + *args: Any, **kwargs: Any, ) -> Iterator["Submodule"]: - """:return: iterator yielding Submodule instances available in the given repository""" + """ + :return: + Iterator yielding :class:`Submodule` instances available in the given + repository + """ try: - pc = repo.commit(parent_commit) # parent commit instance + pc = repo.commit(parent_commit) # Parent commit instance parser = cls._config_parser(repo, pc, read_only=True) except (IOError, BadName): - return iter([]) + return # END handle empty iterator for sms in parser.sections(): @@ -1385,32 +1597,32 @@ class Submodule(IndexObject, TraversableIterableObj): b = str(parser.get(sms, cls.k_head_option)) # END handle optional information - # get the binsha + # Get the binsha. index = repo.index try: - rt = pc.tree # root tree + rt = pc.tree # Root tree sm = rt[p] except KeyError: - # try the index, maybe it was just added + # Try the index, maybe it was just added. try: entry = index.entries[index.entry_key(p, 0)] sm = Submodule(repo, entry.binsha, entry.mode, entry.path) except KeyError: - # The submodule doesn't exist, probably it wasn't - # removed from the .gitmodules file. + # The submodule doesn't exist, probably it wasn't removed from the + # .gitmodules file. continue # END handle keyerror # END handle critical error - # Make sure we are looking at a submodule object + # Make sure we are looking at a submodule object. if type(sm) is not git.objects.submodule.base.Submodule: continue - # fill in remaining info - saves time as it doesn't have to be parsed again + # Fill in remaining info - saves time as it doesn't have to be parsed again. sm._name = n if pc != repo.commit(): sm._parent_commit = pc - # end set only if not most recent ! + # END set only if not most recent! sm._branch_path = git.Head.to_full_path(b) sm._url = u diff --git a/git/objects/submodule/root.py b/git/objects/submodule/root.py index 0cbc262..d93193f 100644 --- a/git/objects/submodule/root.py +++ b/git/objects/submodule/root.py @@ -1,10 +1,16 @@ -from .base import Submodule, UpdateProgress -from .util import find_first_remote_branch -from git.exc import InvalidGitRepositoryError -import git +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +__all__ = ["RootModule", "RootUpdateProgress"] import logging +import git +from git.exc import InvalidGitRepositoryError + +from .base import Submodule, UpdateProgress +from .util import find_first_remote_branch + # typing ------------------------------------------------------------------- from typing import TYPE_CHECKING, Union @@ -17,14 +23,12 @@ if TYPE_CHECKING: # ---------------------------------------------------------------------------- -__all__ = ["RootModule", "RootUpdateProgress"] - -log = logging.getLogger("git.objects.submodule.root") -log.addHandler(logging.NullHandler()) +_logger = logging.getLogger(__name__) class RootUpdateProgress(UpdateProgress): - """Utility class which adds more opcodes to the UpdateProgress""" + """Utility class which adds more opcodes to + :class:`~git.objects.submodule.base.UpdateProgress`.""" REMOVE, PATHCHANGE, BRANCHCHANGE, URLCHANGE = [ 1 << x for x in range(UpdateProgress._num_op_codes, UpdateProgress._num_op_codes + 4) @@ -43,17 +47,19 @@ PATHCHANGE = RootUpdateProgress.PATHCHANGE class RootModule(Submodule): + """A (virtual) root of all submodules in the given repository. - """A (virtual) Root of all submodules in the given repository. It can be used - to more easily traverse all submodules of the master repository""" + This can be used to more easily traverse all submodules of the + superproject (master repository). + """ __slots__ = () k_root_name = "__ROOT__" - def __init__(self, repo: "Repo"): + def __init__(self, repo: "Repo") -> None: # repo, binsha, mode=None, path=None, name = None, parent_commit=None, url=None, ref=None) - super(RootModule, self).__init__( + super().__init__( repo, binsha=self.NULL_BIN_SHA, mode=self.k_default_mode, @@ -65,14 +71,14 @@ class RootModule(Submodule): ) def _clear_cache(self) -> None: - """May not do anything""" + """May not do anything.""" pass # { Interface - def update( + def update( # type: ignore[override] self, - previous_commit: Union[Commit_ish, None] = None, # type: ignore[override] + previous_commit: Union[Commit_ish, str, None] = None, recursive: bool = True, force_remove: bool = False, init: bool = True, @@ -83,44 +89,73 @@ class RootModule(Submodule): keep_going: bool = False, ) -> "RootModule": """Update the submodules of this repository to the current HEAD commit. - This method behaves smartly by determining changes of the path of a submodules + + This method behaves smartly by determining changes of the path of a submodule's repository, next to changes to the to-be-checked-out commit or the branch to be - checked out. This works if the submodules ID does not change. - Additionally it will detect addition and removal of submodules, which will be handled - gracefully. - - :param previous_commit: If set to a commit'ish, the commit we should use - as the previous commit the HEAD pointed to before it was set to the commit it points to now. - If None, it defaults to HEAD@{1} otherwise - :param recursive: if True, the children of submodules will be updated as well - using the same technique - :param force_remove: If submodules have been deleted, they will be forcibly removed. - Otherwise the update may fail if a submodule's repository cannot be deleted as - changes have been made to it (see Submodule.update() for more information) - :param init: If we encounter a new module which would need to be initialized, then do it. - :param to_latest_revision: If True, instead of checking out the revision pointed to - by this submodule's sha, the checked out tracking branch will be merged with the + checked out. This works if the submodule's ID does not change. + + Additionally it will detect addition and removal of submodules, which will be + handled gracefully. + + :param previous_commit: + If set to a commit-ish, the commit we should use as the previous commit the + HEAD pointed to before it was set to the commit it points to now. + If ``None``, it defaults to ``HEAD@{1}`` otherwise. + + :param recursive: + If ``True``, the children of submodules will be updated as well using the + same technique. + + :param force_remove: + If submodules have been deleted, they will be forcibly removed. Otherwise + the update may fail if a submodule's repository cannot be deleted as changes + have been made to it. + (See :meth:`Submodule.update <git.objects.submodule.base.Submodule.update>` + for more information.) + + :param init: + If we encounter a new module which would need to be initialized, then do it. + + :param to_latest_revision: + If ``True``, instead of checking out the revision pointed to by this + submodule's sha, the checked out tracking branch will be merged with the latest remote branch fetched from the repository's origin. - Unless force_reset is specified, a local tracking branch will never be reset into its past, therefore - the remote branch must be in the future for this to have an effect. - :param force_reset: if True, submodules may checkout or reset their branch even if the repository has - pending changes that would be overwritten, or if the local tracking branch is in the future of the - remote tracking branch and would be reset into its past. - :param progress: RootUpdateProgress instance or None if no progress should be sent - :param dry_run: if True, operations will not actually be performed. Progress messages + + Unless `force_reset` is specified, a local tracking branch will never be + reset into its past, therefore the remote branch must be in the future for + this to have an effect. + + :param force_reset: + If ``True``, submodules may checkout or reset their branch even if the + repository has pending changes that would be overwritten, or if the local + tracking branch is in the future of the remote tracking branch and would be + reset into its past. + + :param progress: + :class:`RootUpdateProgress` instance, or ``None`` if no progress should be + sent. + + :param dry_run: + If ``True``, operations will not actually be performed. Progress messages will change accordingly to indicate the WOULD DO state of the operation. - :param keep_going: if True, we will ignore but log all errors, and keep going recursively. - Unless dry_run is set as well, keep_going could cause subsequent/inherited errors you wouldn't see - otherwise. - In conjunction with dry_run, it can be useful to anticipate all errors when updating submodules - :return: self""" + + :param keep_going: + If ``True``, we will ignore but log all errors, and keep going recursively. + Unless `dry_run` is set as well, `keep_going` could cause + subsequent/inherited errors you wouldn't see otherwise. + In conjunction with `dry_run`, this can be useful to anticipate all errors + when updating submodules. + + :return: + self + """ if self.repo.bare: raise InvalidGitRepositoryError("Cannot update submodules in bare repositories") # END handle bare if progress is None: progress = RootUpdateProgress() - # END assure progress is set + # END ensure progress is set prefix = "" if dry_run: @@ -139,11 +174,11 @@ class RootModule(Submodule): raise IndexError # END handle initial commit except IndexError: - # in new repositories, there is no previous commit + # In new repositories, there is no previous commit. previous_commit = cur_commit # END exception handling else: - previous_commit = repo.commit(previous_commit) # obtain commit object + previous_commit = repo.commit(previous_commit) # Obtain commit object. # END handle previous commit psms: "IterableList[Submodule]" = self.list_items(repo, parent_commit=previous_commit) @@ -162,8 +197,8 @@ class RootModule(Submodule): op |= BEGIN # END handle begin - # fake it into thinking its at the current commit to allow deletion - # of previous module. Trigger the cache to be updated before that + # Fake it into thinking its at the current commit to allow deletion + # of previous module. Trigger the cache to be updated before that. progress.update( op, i, @@ -186,7 +221,7 @@ class RootModule(Submodule): # HANDLE PATH RENAMES ##################### - # url changes + branch changes + # URL changes + branch changes. csms = spsms & ssms len_csms = len(csms) for i, csm in enumerate(csms): @@ -202,7 +237,7 @@ class RootModule(Submodule): len_csms, prefix + "Moving repository of submodule %r from %s to %s" % (sm.name, psm.abspath, sm.abspath), ) - # move the module to the new path + # Move the module to the new path. if not dry_run: psm.move(sm.path, module=True, configuration=False) # END handle dry_run @@ -218,14 +253,15 @@ class RootModule(Submodule): # HANDLE URL CHANGE ################### if sm.url != psm.url: - # Add the new remote, remove the old one - # This way, if the url just changes, the commits will not - # have to be re-retrieved + # Add the new remote, remove the old one. + # This way, if the url just changes, the commits will not have + # to be re-retrieved. nn = "__new_origin__" smm = sm.module() rmts = smm.remotes - # don't do anything if we already have the url we search in place + # Don't do anything if we already have the url we search in + # place. if len([r for r in rmts if r.url == sm.url]) == 0: progress.update( BEGIN | URLCHANGE, @@ -248,7 +284,7 @@ class RootModule(Submodule): ) # END head is not detached - # now delete the changed one + # Now delete the changed one. rmt_for_deletion = None for remote in rmts: if remote.url == psm.url: @@ -257,17 +293,17 @@ class RootModule(Submodule): # END if urls match # END for each remote - # if we didn't find a matching remote, but have exactly one, - # we can safely use this one + # If we didn't find a matching remote, but have exactly + # one, we can safely use this one. if rmt_for_deletion is None: if len(rmts) == 1: rmt_for_deletion = rmts[0] else: - # if we have not found any remote with the original url - # we may not have a name. This is a special case, - # and its okay to fail here - # Alternatively we could just generate a unique name and leave all - # existing ones in place + # If we have not found any remote with the + # original URL we may not have a name. This is a + # special case, and its okay to fail here. + # Alternatively we could just generate a unique + # name and leave all existing ones in place. raise InvalidGitRepositoryError( "Couldn't find original remote-repo at url %r" % psm.url ) @@ -277,19 +313,19 @@ class RootModule(Submodule): orig_name = rmt_for_deletion.name smm.delete_remote(rmt_for_deletion) # NOTE: Currently we leave tags from the deleted remotes - # as well as separate tracking branches in the possibly totally - # changed repository ( someone could have changed the url to - # another project ). At some point, one might want to clean - # it up, but the danger is high to remove stuff the user - # has added explicitly + # as well as separate tracking branches in the possibly + # totally changed repository (someone could have changed + # the url to another project). At some point, one might + # want to clean it up, but the danger is high to remove + # stuff the user has added explicitly. - # rename the new remote back to what it was + # Rename the new remote back to what it was. smr.rename(orig_name) - # early on, we verified that the our current tracking branch - # exists in the remote. Now we have to assure that the - # sha we point to is still contained in the new remote - # tracking branch. + # Early on, we verified that the our current tracking + # branch exists in the remote. Now we have to ensure + # that the sha we point to is still contained in the new + # remote tracking branch. smsha = sm.binsha found = False rref = smr.refs[self.branch_name] @@ -301,11 +337,12 @@ class RootModule(Submodule): # END for each commit if not found: - # adjust our internal binsha to use the one of the remote - # this way, it will be checked out in the next step - # This will change the submodule relative to us, so - # the user will be able to commit the change easily - log.warning( + # Adjust our internal binsha to use the one of the + # remote this way, it will be checked out in the + # next step. This will change the submodule relative + # to us, so the user will be able to commit the + # change easily. + _logger.warning( "Current sha %s was not contained in the tracking\ branch at the new remote, setting it the the remote's tracking branch", sm.hexsha, @@ -313,7 +350,8 @@ class RootModule(Submodule): sm.binsha = rref.commit.binsha # END reset binsha - # NOTE: All checkout is performed by the base implementation of update + # NOTE: All checkout is performed by the base + # implementation of update. # END handle dry_run progress.update( END | URLCHANGE, @@ -327,8 +365,8 @@ class RootModule(Submodule): # HANDLE PATH CHANGES ##################### if sm.branch_path != psm.branch_path: - # finally, create a new tracking branch which tracks the - # new remote branch + # Finally, create a new tracking branch which tracks the new + # remote branch. progress.update( BEGIN | BRANCHCHANGE, i, @@ -340,10 +378,11 @@ class RootModule(Submodule): if not dry_run: smm = sm.module() smmr = smm.remotes - # As the branch might not exist yet, we will have to fetch all remotes to be sure ... . + # As the branch might not exist yet, we will have to fetch + # all remotes to be sure... for remote in smmr: remote.fetch(progress=progress) - # end for each remote + # END for each remote try: tbr = git.Head.create( @@ -352,15 +391,17 @@ class RootModule(Submodule): logmsg="branch: Created from HEAD", ) except OSError: - # ... or reuse the existing one + # ...or reuse the existing one. tbr = git.Head(smm, sm.branch_path) - # END assure tracking branch exists + # END ensure tracking branch exists tbr.set_tracking_branch(find_first_remote_branch(smmr, sm.branch_name)) - # NOTE: All head-resetting is done in the base implementation of update - # but we will have to checkout the new branch here. As it still points to the currently - # checkout out commit, we don't do any harm. - # As we don't want to update working-tree or index, changing the ref is all there is to do + # NOTE: All head-resetting is done in the base + # implementation of update but we will have to checkout the + # new branch here. As it still points to the currently + # checked out commit, we don't do any harm. + # As we don't want to update working-tree or index, changing + # the ref is all there is to do. smm.head.reference = tbr # END handle dry_run @@ -376,13 +417,13 @@ class RootModule(Submodule): except Exception as err: if not keep_going: raise - log.error(str(err)) - # end handle keep_going + _logger.error(str(err)) + # END handle keep_going # FINALLY UPDATE ALL ACTUAL SUBMODULES ###################################### for sm in sms: - # update the submodule using the default method + # Update the submodule using the default method. sm.update( recursive=False, init=init, @@ -393,12 +434,12 @@ class RootModule(Submodule): keep_going=keep_going, ) - # update recursively depth first - question is which inconsistent - # state will be better in case it fails somewhere. Defective branch - # or defective depth. The RootSubmodule type will never process itself, - # which was done in the previous expression + # Update recursively depth first - question is which inconsistent state will + # be better in case it fails somewhere. Defective branch or defective depth. + # The RootSubmodule type will never process itself, which was done in the + # previous expression. if recursive: - # the module would exist by now if we are not in dry_run mode + # The module would exist by now if we are not in dry_run mode. if sm.module_exists(): type(self)(sm.module()).update( recursive=True, @@ -417,7 +458,7 @@ class RootModule(Submodule): return self def module(self) -> "Repo": - """:return: the actual repository containing the submodules""" + """:return: The actual repository containing the submodules""" return self.repo # } END interface diff --git a/git/objects/submodule/util.py b/git/objects/submodule/util.py index 56ce148..c021510 100644 --- a/git/objects/submodule/util.py +++ b/git/objects/submodule/util.py @@ -1,9 +1,20 @@ -import git -from git.exc import InvalidGitRepositoryError -from git.config import GitConfigParser +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +__all__ = [ + "sm_section", + "sm_name", + "mkhead", + "find_first_remote_branch", + "SubmoduleConfigParser", +] + from io import BytesIO import weakref +import git +from git.config import GitConfigParser +from git.exc import InvalidGitRepositoryError # typing ----------------------------------------------------------------------- @@ -12,32 +23,24 @@ from typing import Any, Sequence, TYPE_CHECKING, Union from git.types import PathLike if TYPE_CHECKING: - from .base import Submodule from weakref import ReferenceType - from git.repo import Repo - from git.refs import Head - from git import Remote - from git.refs import RemoteReference + from git.refs import Head, RemoteReference + from git.remote import Remote + from git.repo import Repo -__all__ = ( - "sm_section", - "sm_name", - "mkhead", - "find_first_remote_branch", - "SubmoduleConfigParser", -) + from .base import Submodule # { Utilities def sm_section(name: str) -> str: - """:return: section title used in .gitmodules configuration file""" + """:return: Section title used in ``.gitmodules`` configuration file""" return f'submodule "{name}"' def sm_name(section: str) -> str: - """:return: name of the submodule as parsed from the section name""" + """:return: Name of the submodule as parsed from the section name""" section = section.strip() return section[11:-1] @@ -48,7 +51,8 @@ def mkhead(repo: "Repo", path: PathLike) -> "Head": def find_first_remote_branch(remotes: Sequence["Remote"], branch_name: str) -> "RemoteReference": - """Find the remote branch matching the name of the given branch or raise InvalidGitRepositoryError""" + """Find the remote branch matching the name of the given branch or raise + :exc:`~git.exc.InvalidGitRepositoryError`.""" for remote in remotes: try: return remote.refs[branch_name] @@ -61,37 +65,36 @@ def find_first_remote_branch(remotes: Sequence["Remote"], branch_name: str) -> " # } END utilities - # { Classes class SubmoduleConfigParser(GitConfigParser): + """Catches calls to :meth:`~git.config.GitConfigParser.write`, and updates the + ``.gitmodules`` blob in the index with the new data, if we have written into a + stream. - """ - Catches calls to _write, and updates the .gitmodules blob in the index - with the new data, if we have written into a stream. Otherwise it will - add the local file to the index to make it correspond with the working tree. - Additionally, the cache must be cleared + Otherwise it would add the local file to the index to make it correspond with the + working tree. Additionally, the cache must be cleared. - Please note that no mutating method will work in bare mode + Please note that no mutating method will work in bare mode. """ def __init__(self, *args: Any, **kwargs: Any) -> None: self._smref: Union["ReferenceType[Submodule]", None] = None self._index = None self._auto_write = True - super(SubmoduleConfigParser, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) # { Interface def set_submodule(self, submodule: "Submodule") -> None: - """Set this instance's submodule. It must be called before - the first write operation begins""" + """Set this instance's submodule. It must be called before the first write + operation begins.""" self._smref = weakref.ref(submodule) def flush_to_index(self) -> None: - """Flush changes in our configuration file to the index""" + """Flush changes in our configuration file to the index.""" assert self._smref is not None - # should always have a file here + # Should always have a file here. assert not isinstance(self._file_or_files, BytesIO) sm = self._smref() @@ -108,7 +111,7 @@ class SubmoduleConfigParser(GitConfigParser): # { Overridden Methods def write(self) -> None: # type: ignore[override] - rval: None = super(SubmoduleConfigParser, self).write() + rval: None = super().write() self.flush_to_index() return rval diff --git a/git/objects/tag.py b/git/objects/tag.py index 56fd05d..88671d3 100644 --- a/git/objects/tag.py +++ b/git/objects/tag.py @@ -1,33 +1,53 @@ -# objects.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ -""" Module containing all object based types. """ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Provides an :class:`~git.objects.base.Object`-based type for annotated tags. + +This defines the :class:`TagObject` class, which represents annotated tags. +For lightweight tags, see the :mod:`git.refs.tag` module. +""" + +__all__ = ["TagObject"] + +import sys + +from git.compat import defenc +from git.util import Actor, hex_to_bin + from . import base from .util import get_object_type_by_name, parse_actor_and_date -from ..util import hex_to_bin -from ..compat import defenc + +# typing ---------------------------------------------- from typing import List, TYPE_CHECKING, Union -from git.types import Literal +if sys.version_info >= (3, 8): + from typing import Literal +else: + from typing_extensions import Literal if TYPE_CHECKING: from git.repo import Repo - from git.util import Actor - from .commit import Commit + from .blob import Blob + from .commit import Commit from .tree import Tree -__all__ = ("TagObject",) +# --------------------------------------------------- class TagObject(base.Object): + """Annotated (i.e. non-lightweight) tag carrying additional information about an + object we are pointing to. - """Non-Lightweight tag carrying additional information about an object we are pointing to.""" + See :manpage:`gitglossary(7)` on "tag object": + https://git-scm.com/docs/gitglossary#def_tag_object + """ type: Literal["tag"] = "tag" + __slots__ = ( "object", "tag", @@ -43,24 +63,37 @@ class TagObject(base.Object): binsha: bytes, object: Union[None, base.Object] = None, tag: Union[None, str] = None, - tagger: Union[None, "Actor"] = None, + tagger: Union[None, Actor] = None, tagged_date: Union[int, None] = None, tagger_tz_offset: Union[int, None] = None, message: Union[str, None] = None, ) -> None: # @ReservedAssignment - """Initialize a tag object with additional data + """Initialize a tag object with additional data. + + :param repo: + Repository this object is located in. + + :param binsha: + 20 byte SHA1. + + :param object: + :class:`~git.objects.base.Object` instance of object we are pointing to. + + :param tag: + Name of this tag. + + :param tagger: + :class:`~git.util.Actor` identifying the tagger. - :param repo: repository this object is located in - :param binsha: 20 byte SHA1 - :param object: Object instance of object we are pointing to - :param tag: name of this tag - :param tagger: Actor identifying the tagger :param tagged_date: int_seconds_since_epoch - is the DateTime of the tag creation - use time.gmtime to convert - it into a different format - :param tagged_tz_offset: int_seconds_west_of_utc is the timezone that the - authored_date is in, in a format similar to time.altzone""" - super(TagObject, self).__init__(repo, binsha) + The DateTime of the tag creation. + Use :func:`time.gmtime` to convert it into a different format. + + :param tagger_tz_offset: int_seconds_west_of_utc + The timezone that the `tagged_date` is in, in a format similar to + :attr:`time.altzone`. + """ + super().__init__(repo, binsha) if object is not None: self.object: Union["Commit", "Blob", "Tree", "TagObject"] = object if tag is not None: @@ -75,7 +108,7 @@ class TagObject(base.Object): self.message = message def _set_cache_(self, attr: str) -> None: - """Cache all our attributes at once""" + """Cache all our attributes at once.""" if attr in TagObject.__slots__: ostream = self.repo.odb.stream(self.binsha) lines: List[str] = ostream.read().decode(defenc, "replace").splitlines() @@ -95,13 +128,13 @@ class TagObject(base.Object): self.tagger_tz_offset, ) = parse_actor_and_date(tagger_info) - # line 4 empty - it could mark the beginning of the next header - # in case there really is no message, it would not exist. Otherwise - # a newline separates header from message + # Line 4 empty - it could mark the beginning of the next header. + # In case there really is no message, it would not exist. + # Otherwise a newline separates header from message. if len(lines) > 5: self.message = "\n".join(lines[5:]) else: self.message = "" # END check our attributes else: - super(TagObject, self)._set_cache_(attr) + super()._set_cache_(attr) diff --git a/git/objects/tree.py b/git/objects/tree.py index 4f490af..09184a7 100644 --- a/git/objects/tree.py +++ b/git/objects/tree.py @@ -1,20 +1,20 @@ -# tree.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +__all__ = ["TreeModifier", "Tree"] + +import sys -from git.util import IterableList, join_path import git.diff as git_diff -from git.util import to_bin_sha +from git.util import IterableList, join_path, to_bin_sha from . import util -from .base import IndexObject, IndexObjUnion +from .base import IndexObjUnion, IndexObject from .blob import Blob -from .submodule.base import Submodule - from .fun import tree_entries_from_data, tree_to_stream - +from .submodule.base import Submodule # typing ------------------------------------------------- @@ -26,90 +26,43 @@ from typing import ( Iterator, List, Tuple, + TYPE_CHECKING, Type, Union, cast, - TYPE_CHECKING, ) -from git.types import PathLike, Literal +if sys.version_info >= (3, 8): + from typing import Literal +else: + from typing_extensions import Literal + +from git.types import PathLike if TYPE_CHECKING: - from git.repo import Repo from io import BytesIO + from git.repo import Repo + TreeCacheTup = Tuple[bytes, int, str] TraversedTreeTup = Union[Tuple[Union["Tree", None], IndexObjUnion, Tuple["Submodule", "Submodule"]]] - -# def is_tree_cache(inp: Tuple[bytes, int, str]) -> TypeGuard[TreeCacheTup]: -# return isinstance(inp[0], bytes) and isinstance(inp[1], int) and isinstance([inp], str) - # -------------------------------------------------------- - cmp: Callable[[str, str], int] = lambda a, b: (a > b) - (a < b) -__all__ = ("TreeModifier", "Tree") - - -def git_cmp(t1: TreeCacheTup, t2: TreeCacheTup) -> int: - a, b = t1[2], t2[2] - # assert isinstance(a, str) and isinstance(b, str) - len_a, len_b = len(a), len(b) - min_len = min(len_a, len_b) - min_cmp = cmp(a[:min_len], b[:min_len]) - - if min_cmp: - return min_cmp - - return len_a - len_b - - -def merge_sort(a: List[TreeCacheTup], cmp: Callable[[TreeCacheTup, TreeCacheTup], int]) -> None: - if len(a) < 2: - return None - - mid = len(a) // 2 - lefthalf = a[:mid] - righthalf = a[mid:] - - merge_sort(lefthalf, cmp) - merge_sort(righthalf, cmp) - - i = 0 - j = 0 - k = 0 - - while i < len(lefthalf) and j < len(righthalf): - if cmp(lefthalf[i], righthalf[j]) <= 0: - a[k] = lefthalf[i] - i = i + 1 - else: - a[k] = righthalf[j] - j = j + 1 - k = k + 1 - - while i < len(lefthalf): - a[k] = lefthalf[i] - i = i + 1 - k = k + 1 - - while j < len(righthalf): - a[k] = righthalf[j] - j = j + 1 - k = k + 1 - -class TreeModifier(object): +class TreeModifier: + """A utility class providing methods to alter the underlying cache in a list-like + fashion. - """A utility class providing methods to alter the underlying cache in a list-like fashion. - - Once all adjustments are complete, the _cache, which really is a reference to - the cache of a tree, will be sorted. Assuring it will be in a serializable state""" + Once all adjustments are complete, the :attr:`_cache`, which really is a reference + to the cache of a tree, will be sorted. This ensures it will be in a serializable + state. + """ - __slots__ = "_cache" + __slots__ = ("_cache",) def __init__(self, cache: List[TreeCacheTup]) -> None: self._cache = cache @@ -126,27 +79,39 @@ class TreeModifier(object): # { Interface def set_done(self) -> "TreeModifier": """Call this method once you are done modifying the tree information. - It may be called several times, but be aware that each call will cause - a sort operation - :return self:""" - merge_sort(self._cache, git_cmp) + This may be called several times, but be aware that each call will cause a sort + operation. + + :return: + self + """ + self._cache.sort(key=lambda x: (x[2] + "/") if x[1] == Tree.tree_id << 12 else x[2]) return self # } END interface # { Mutators def add(self, sha: bytes, mode: int, name: str, force: bool = False) -> "TreeModifier": - """Add the given item to the tree. If an item with the given name already - exists, nothing will be done, but a ValueError will be raised if the - sha and mode of the existing item do not match the one you add, unless - force is True - - :param sha: The 20 or 40 byte sha of the item to add - :param mode: int representing the stat compatible mode of the item - :param force: If True, an item with your name and information will overwrite - any existing item with the same name, no matter which information it has - :return: self""" + """Add the given item to the tree. + + If an item with the given name already exists, nothing will be done, but a + :exc:`ValueError` will be raised if the sha and mode of the existing item do not + match the one you add, unless `force` is ``True``. + + :param sha: + The 20 or 40 byte sha of the item to add. + + :param mode: + :class:`int` representing the stat-compatible mode of the item. + + :param force: + If ``True``, an item with your name and information will overwrite any + existing item with the same name, no matter which information it has. + + :return: + self + """ if "/" in name: raise ValueError("Name must not contain '/' characters") if (mode >> 12) not in Tree._map_id_to_type: @@ -156,7 +121,6 @@ class TreeModifier(object): index = self._index_by_name(name) item = (sha, mode, name) - # assert is_tree_cache(item) if index == -1: self._cache.append(item) @@ -173,18 +137,21 @@ class TreeModifier(object): return self def add_unchecked(self, binsha: bytes, mode: int, name: str) -> None: - """Add the given item to the tree, its correctness is assumed, which - puts the caller into responsibility to assure the input is correct. - For more information on the parameters, see ``add`` + """Add the given item to the tree. Its correctness is assumed, so it is the + caller's responsibility to ensure that the input is correct. + + For more information on the parameters, see :meth:`add`. - :param binsha: 20 byte binary sha""" + :param binsha: + 20 byte binary sha. + """ assert isinstance(binsha, bytes) and isinstance(mode, int) and isinstance(name, str) tree_cache = (binsha, mode, name) self._cache.append(tree_cache) def __delitem__(self, name: str) -> None: - """Deletes an item with the given name if it exists""" + """Delete an item with the given name if it exists.""" index = self._index_by_name(name) if index > -1: del self._cache[index] @@ -193,23 +160,24 @@ class TreeModifier(object): class Tree(IndexObject, git_diff.Diffable, util.Traversable, util.Serializable): + R"""Tree objects represent an ordered list of :class:`~git.objects.blob.Blob`\s and + other :class:`Tree`\s. - """Tree objects represent an ordered list of Blobs and other Trees. - - ``Tree as a list``:: + See :manpage:`gitglossary(7)` on "tree object": + https://git-scm.com/docs/gitglossary#def_tree_object - Access a specific blob using the - tree['filename'] notation. + Subscripting is supported, as with a list or dict: - You may as well access by index - blob = tree[0] + * Access a specific blob using the ``tree["filename"]`` notation. + * You may likewise access by index, like ``blob = tree[0]``. """ type: Literal["tree"] = "tree" - __slots__ = "_cache" - # actual integer ids for comparison - commit_id = 0o16 # equals stat.S_IFDIR | stat.S_IFLNK - a directory link + __slots__ = ("_cache",) + + # Actual integer IDs for comparison. + commit_id = 0o16 # Equals stat.S_IFDIR | stat.S_IFLNK - a directory link. blob_id = 0o10 symlink_id = 0o12 tree_id = 0o04 @@ -217,8 +185,8 @@ class Tree(IndexObject, git_diff.Diffable, util.Traversable, util.Serializable): _map_id_to_type: Dict[int, Type[IndexObjUnion]] = { commit_id: Submodule, blob_id: Blob, - symlink_id: Blob - # tree id added once Tree is defined + symlink_id: Blob, + # Tree ID added once Tree is defined. } def __init__( @@ -228,7 +196,7 @@ class Tree(IndexObject, git_diff.Diffable, util.Traversable, util.Serializable): mode: int = tree_id << 12, path: Union[PathLike, None] = None, ): - super(Tree, self).__init__(repo, binsha, mode, path) + super().__init__(repo, binsha, mode, path) @classmethod def _get_intermediate_items( @@ -241,16 +209,17 @@ class Tree(IndexObject, git_diff.Diffable, util.Traversable, util.Serializable): def _set_cache_(self, attr: str) -> None: if attr == "_cache": - # Set the data when we need it + # Set the data when we need it. ostream = self.repo.odb.stream(self.binsha) self._cache: List[TreeCacheTup] = tree_entries_from_data(ostream.read()) else: - super(Tree, self)._set_cache_(attr) + super()._set_cache_(attr) # END handle attribute def _iter_convert_to_object(self, iterable: Iterable[TreeCacheTup]) -> Iterator[IndexObjUnion]: - """Iterable yields tuples of (binsha, mode, name), which will be converted - to the respective object representation""" + """Iterable yields tuples of (binsha, mode, name), which will be converted to + the respective object representation. + """ for binsha, mode, name in iterable: path = join_path(self.path, name) try: @@ -260,10 +229,15 @@ class Tree(IndexObject, git_diff.Diffable, util.Traversable, util.Serializable): # END for each item def join(self, file: str) -> IndexObjUnion: - """Find the named object in this tree's contents + """Find the named object in this tree's contents. + + :return: + :class:`~git.objects.blob.Blob`, :class:`Tree`, or + :class:`~git.objects.submodule.base.Submodule` - :return: ``git.Blob`` or ``git.Tree`` or ``git.Submodule`` - :raise KeyError: if given file or tree does not exist in tree""" + :raise KeyError: + If the given file or tree does not exist in this tree. + """ msg = "Blob or Tree named %r not found" if "/" in file: tree = self @@ -274,7 +248,7 @@ class Tree(IndexObject, git_diff.Diffable, util.Traversable, util.Serializable): if item.type == "tree": tree = item else: - # safety assertion - blobs are at the end of the path + # Safety assertion - blobs are at the end of the path. if i != len(tokens) - 1: raise KeyError(msg % file) return item @@ -294,30 +268,38 @@ class Tree(IndexObject, git_diff.Diffable, util.Traversable, util.Serializable): # END handle long paths def __truediv__(self, file: str) -> IndexObjUnion: - """For PY3 only""" + """The ``/`` operator is another syntax for joining. + + See :meth:`join` for details. + """ return self.join(file) @property def trees(self) -> List["Tree"]: - """:return: list(Tree, ...) list of trees directly below this tree""" + """:return: list(Tree, ...) List of trees directly below this tree""" return [i for i in self if i.type == "tree"] @property def blobs(self) -> List[Blob]: - """:return: list(Blob, ...) list of blobs directly below this tree""" + """:return: list(Blob, ...) List of blobs directly below this tree""" return [i for i in self if i.type == "blob"] @property def cache(self) -> TreeModifier: """ - :return: An object allowing to modify the internal cache. This can be used - to change the tree's contents. When done, make sure you call ``set_done`` - on the tree modifier, or serialization behaviour will be incorrect. - See the ``TreeModifier`` for more information on how to alter the cache""" + :return: + An object allowing modification of the internal cache. This can be used to + change the tree's contents. When done, make sure you call + :meth:`~TreeModifier.set_done` on the tree modifier, or serialization + behaviour will be incorrect. + + :note: + See :class:`TreeModifier` for more information on how to alter the cache. + """ return TreeModifier(self._cache) def traverse( - self, # type: ignore[override] + self, predicate: Callable[[Union[IndexObjUnion, TraversedTreeTup], int], bool] = lambda i, d: True, prune: Callable[[Union[IndexObjUnion, TraversedTreeTup], int], bool] = lambda i, d: False, depth: int = -1, @@ -326,25 +308,29 @@ class Tree(IndexObject, git_diff.Diffable, util.Traversable, util.Serializable): ignore_self: int = 1, as_edge: bool = False, ) -> Union[Iterator[IndexObjUnion], Iterator[TraversedTreeTup]]: - """For documentation, see util.Traversable._traverse() - Trees are set to visit_once = False to gain more performance in the traversal""" + """For documentation, see + `Traversable._traverse() <git.objects.util.Traversable._traverse>`. + + Trees are set to ``visit_once = False`` to gain more performance in the + traversal. + """ - # """ # # To typecheck instead of using cast. # import itertools # def is_tree_traversed(inp: Tuple) -> TypeGuard[Tuple[Iterator[Union['Tree', 'Blob', 'Submodule']]]]: # return all(isinstance(x, (Blob, Tree, Submodule)) for x in inp[1]) - # ret = super(Tree, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self) + # ret = super().traverse(predicate, prune, depth, branch_first, visit_once, ignore_self) # ret_tup = itertools.tee(ret, 2) # assert is_tree_traversed(ret_tup), f"Type is {[type(x) for x in list(ret_tup[0])]}" - # return ret_tup[0]""" + # return ret_tup[0] + return cast( Union[Iterator[IndexObjUnion], Iterator[TraversedTreeTup]], - super(Tree, self)._traverse( - predicate, - prune, - depth, # type: ignore + super()._traverse( + predicate, # type: ignore[arg-type] + prune, # type: ignore[arg-type] + depth, branch_first, visit_once, ignore_self, @@ -353,11 +339,13 @@ class Tree(IndexObject, git_diff.Diffable, util.Traversable, util.Serializable): def list_traverse(self, *args: Any, **kwargs: Any) -> IterableList[IndexObjUnion]: """ - :return: IterableList with the results of the traversal as produced by - traverse() - Tree -> IterableList[Union['Submodule', 'Tree', 'Blob']] + :return: + :class:`~git.util.IterableList` with the results of the traversal as + produced by :meth:`traverse` + + Tree -> IterableList[Union[Submodule, Tree, Blob]] """ - return super(Tree, self)._list_traverse(*args, **kwargs) + return super()._list_traverse(*args, **kwargs) # List protocol @@ -392,7 +380,7 @@ class Tree(IndexObject, git_diff.Diffable, util.Traversable, util.Serializable): # END handle item is index object # compatibility - # treat item as repo-relative path + # Treat item as repo-relative path. else: path = self.path for info in self._cache: @@ -402,13 +390,16 @@ class Tree(IndexObject, git_diff.Diffable, util.Traversable, util.Serializable): return False def __reversed__(self) -> Iterator[IndexObjUnion]: - return reversed(self._iter_convert_to_object(self._cache)) # type: ignore + return reversed(self._iter_convert_to_object(self._cache)) # type: ignore[call-overload] def _serialize(self, stream: "BytesIO") -> "Tree": - """Serialize this tree into the stream. Please note that we will assume - our tree data to be in a sorted state. If this is not the case, serialization - will not generate a correct tree representation as these are assumed to be sorted - by algorithms""" + """Serialize this tree into the stream. Assumes sorted tree data. + + :note: + We will assume our tree data to be in a sorted state. If this is not the + case, serialization will not generate a correct tree representation as these + are assumed to be sorted by algorithms. + """ tree_to_stream(self._cache, stream.write) return self @@ -419,6 +410,5 @@ class Tree(IndexObject, git_diff.Diffable, util.Traversable, util.Serializable): # END tree -# finalize map definition +# Finalize map definition. Tree._map_id_to_type[Tree.tree_id] = Tree -# diff --git a/git/objects/util.py b/git/objects/util.py index 992a53d..a68d701 100644 --- a/git/objects/util.py +++ b/git/objects/util.py @@ -1,55 +1,67 @@ -# util.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ -"""Module for general utility functions""" -# flake8: noqa F401 +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ +"""Utility functions for working with git objects.""" -from abc import ABC, abstractmethod -import warnings -from git.util import IterableList, IterableObj, Actor +__all__ = [ + "get_object_type_by_name", + "parse_date", + "parse_actor_and_date", + "ProcessStreamAdapter", + "Traversable", + "altz_to_utctz_str", + "utctz_to_altz", + "verify_utctz", + "Actor", + "tzoffset", + "utc", +] -import re +from abc import ABC, abstractmethod +import calendar from collections import deque - +from datetime import datetime, timedelta, tzinfo +import re from string import digits import time -import calendar -from datetime import datetime, timedelta, tzinfo +import warnings + +from git.util import Actor, IterableList, IterableObj # typing ------------------------------------------------------------ + from typing import ( Any, Callable, Deque, Iterator, - Generic, NamedTuple, - overload, - Sequence, # NOQA: F401 + Sequence, TYPE_CHECKING, Tuple, Type, TypeVar, Union, cast, + overload, ) -from git.types import Has_id_attribute, Literal, _T # NOQA: F401 +from git.types import Has_id_attribute, Literal if TYPE_CHECKING: from io import BytesIO, StringIO - from .commit import Commit - from .blob import Blob - from .tag import TagObject - from .tree import Tree, TraversedTreeTup from subprocess import Popen - from .submodule.base import Submodule + from git.types import Protocol, runtime_checkable + + from .blob import Blob + from .commit import Commit + from .submodule.base import Submodule + from .tag import TagObject + from .tree import TraversedTreeTup, Tree else: - # Protocol = Generic[_T] # Needed for typing bug #572? Protocol = ABC def runtime_checkable(f): @@ -62,42 +74,32 @@ class TraverseNT(NamedTuple): src: Union["Traversable", None] -T_TIobj = TypeVar("T_TIobj", bound="TraversableIterableObj") # for TraversableIterableObj.traverse() +T_TIobj = TypeVar("T_TIobj", bound="TraversableIterableObj") # For TraversableIterableObj.traverse() TraversedTup = Union[ - Tuple[Union["Traversable", None], "Traversable"], # for commit, submodule - "TraversedTreeTup", -] # for tree.traverse() + Tuple[Union["Traversable", None], "Traversable"], # For Commit, Submodule. + "TraversedTreeTup", # For Tree.traverse(). +] # -------------------------------------------------------------------- -__all__ = ( - "get_object_type_by_name", - "parse_date", - "parse_actor_and_date", - "ProcessStreamAdapter", - "Traversable", - "altz_to_utctz_str", - "utctz_to_altz", - "verify_utctz", - "Actor", - "tzoffset", - "utc", -) - ZERO = timedelta(0) # { Functions def mode_str_to_int(modestr: Union[bytes, str]) -> int: - """ - :param modestr: string like 755 or 644 or 100644 - only the last 6 chars will be used + """Convert mode bits from an octal mode string to an integer mode for git. + + :param modestr: + String like ``755`` or ``644`` or ``100644`` - only the last 6 chars will be + used. + :return: - String identifying a mode compatible to the mode methods ids of the - stat module regarding the rwx permissions for user, group and other, - special flags and file system flags, i.e. whether it is a symlink - for example.""" + String identifying a mode compatible to the mode methods ids of the :mod:`stat` + module regarding the rwx permissions for user, group and other, special flags + and file system flags, such as whether it is a symlink. + """ mode = 0 for iteration, char in enumerate(reversed(modestr[-6:])): char = cast(Union[str, int], char) @@ -109,13 +111,18 @@ def mode_str_to_int(modestr: Union[bytes, str]) -> int: def get_object_type_by_name( object_type_name: bytes, ) -> Union[Type["Commit"], Type["TagObject"], Type["Tree"], Type["Blob"]]: - """ - :return: type suitable to handle the given object type name. - Use the type to create new instances. + """Retrieve the Python class GitPython uses to represent a kind of Git object. + + :return: + A type suitable to handle the given as `object_type_name`. + This type can be called create new instances. - :param object_type_name: Member of TYPES + :param object_type_name: + Member of :attr:`Object.TYPES <git.objects.base.Object.TYPES>`. - :raise ValueError: In case object_type_name is unknown""" + :raise ValueError: + If `object_type_name` is unknown. + """ if object_type_name == b"commit": from . import commit @@ -137,10 +144,11 @@ def get_object_type_by_name( def utctz_to_altz(utctz: str) -> int: - """Convert a git timezone offset into a timezone offset west of - UTC in seconds (compatible with time.altzone). + """Convert a git timezone offset into a timezone offset west of UTC in seconds + (compatible with :attr:`time.altzone`). - :param utctz: git utc timezone string, i.e. +0200 + :param utctz: + git utc timezone string, e.g. +0200 """ int_utctz = int(utctz) seconds = (abs(int_utctz) // 100) * 3600 + (abs(int_utctz) % 100) * 60 @@ -148,9 +156,11 @@ def utctz_to_altz(utctz: str) -> int: def altz_to_utctz_str(altz: float) -> str: - """Convert a timezone offset west of UTC in seconds into a git timezone offset string + """Convert a timezone offset west of UTC in seconds into a Git timezone offset + string. - :param altz: timezone offset in seconds west of UTC + :param altz: + Timezone offset in seconds west of UTC. """ hours = abs(altz) // 3600 minutes = (abs(altz) % 3600) // 60 @@ -159,8 +169,13 @@ def altz_to_utctz_str(altz: float) -> str: def verify_utctz(offset: str) -> str: - """:raise ValueError: if offset is incorrect - :return: offset""" + """ + :raise ValueError: + If `offset` is incorrect. + + :return: + `offset` + """ fmt_exc = ValueError("Invalid timezone offset format: %s" % offset) if len(offset) != 5: raise fmt_exc @@ -194,7 +209,8 @@ utc = tzoffset(0, "UTC") def from_timestamp(timestamp: float, tz_offset: float) -> datetime: - """Converts a timestamp + tz_offset into an aware datetime instance.""" + """Convert a `timestamp` + `tz_offset` into an aware :class:`~datetime.datetime` + instance.""" utc_dt = datetime.fromtimestamp(timestamp, utc) try: local_dt = utc_dt.astimezone(tzoffset(tz_offset)) @@ -204,18 +220,21 @@ def from_timestamp(timestamp: float, tz_offset: float) -> datetime: def parse_date(string_date: Union[str, datetime]) -> Tuple[int, int]: - """ - Parse the given date as one of the following + """Parse the given date as one of the following: - * aware datetime instance + * Aware datetime instance * Git internal format: timestamp offset - * RFC 2822: Thu, 07 Apr 2005 22:13:13 +0200. - * ISO 8601 2005-04-07T22:13:13 - The T can be a space as well + * :rfc:`2822`: ``Thu, 07 Apr 2005 22:13:13 +0200`` + * ISO 8601: ``2005-04-07T22:13:13`` - The ``T`` can be a space as well. + + :return: + Tuple(int(timestamp_UTC), int(offset)), both in seconds since epoch - :return: Tuple(int(timestamp_UTC), int(offset)), both in seconds since epoch - :raise ValueError: If the format could not be understood - :note: Date can also be YYYY.MM.DD, MM/DD/YYYY and DD.MM.YYYY. + :raise ValueError: + If the format could not be understood. + + :note: + Date can also be ``YYYY.MM.DD``, ``MM/DD/YYYY`` and ``DD.MM.YYYY``. """ if isinstance(string_date, datetime): if string_date.tzinfo: @@ -225,7 +244,7 @@ def parse_date(string_date: Union[str, datetime]) -> Tuple[int, int]: else: raise ValueError(f"string_date datetime object without tzinfo, {string_date}") - # git time + # Git time try: if string_date.count(" ") == 1 and string_date.rfind(":") == -1: timestamp, offset_str = string_date.split() @@ -234,21 +253,21 @@ def parse_date(string_date: Union[str, datetime]) -> Tuple[int, int]: timestamp_int = int(timestamp) return timestamp_int, utctz_to_altz(verify_utctz(offset_str)) else: - offset_str = "+0000" # local time by default + offset_str = "+0000" # Local time by default. if string_date[-5] in "-+": offset_str = verify_utctz(string_date[-5:]) string_date = string_date[:-6] # skip space as well # END split timezone info offset = utctz_to_altz(offset_str) - # now figure out the date and time portion - split time + # Now figure out the date and time portion - split time. date_formats = [] splitter = -1 if "," in string_date: date_formats.append("%a, %d %b %Y") splitter = string_date.rfind(" ") else: - # iso plus additional + # ISO plus additional date_formats.append("%Y-%m-%d") date_formats.append("%Y.%m.%d") date_formats.append("%m/%d/%Y") @@ -258,15 +277,15 @@ def parse_date(string_date: Union[str, datetime]) -> Tuple[int, int]: if splitter == -1: splitter = string_date.rfind(" ") # END handle 'T' and ' ' - # END handle rfc or iso + # END handle RFC or ISO assert splitter > -1 - # split date and time - time_part = string_date[splitter + 1 :] # skip space + # Split date and time. + time_part = string_date[splitter + 1 :] # Skip space. date_part = string_date[:splitter] - # parse time + # Parse time. tstruct = time.strptime(time_part, "%H:%M:%S") for fmt in date_formats: @@ -291,7 +310,7 @@ def parse_date(string_date: Union[str, datetime]) -> Tuple[int, int]: # END exception handling # END for each fmt - # still here ? fail + # Still here ? fail. raise ValueError("no format matched") # END handle format except Exception as e: @@ -299,7 +318,7 @@ def parse_date(string_date: Union[str, datetime]) -> Tuple[int, int]: # END handle exceptions -# precompiled regex +# Precompiled regexes _re_actor_epoch = re.compile(r"^.+? (.*) (\d+) ([+-]\d+).*$") _re_only_actor = re.compile(r"^.+? (.*)$") @@ -309,7 +328,9 @@ def parse_actor_and_date(line: str) -> Tuple[Actor, int, int]: author Tom Preston-Werner <tom@mojombo.com> 1191999972 -0700 - :return: [Actor, int_seconds_since_epoch, int_timezone_offset]""" + :return: + [Actor, int_seconds_since_epoch, int_timezone_offset] + """ actor, epoch, offset = "", "0", "0" m = _re_actor_epoch.search(line) if m: @@ -326,13 +347,13 @@ def parse_actor_and_date(line: str) -> Tuple[Actor, int, int]: # { Classes -class ProcessStreamAdapter(object): - - """Class wireing all calls to the contained Process instance. +class ProcessStreamAdapter: + """Class wiring all calls to the contained Process instance. Use this type to hide the underlying process to provide access only to a specified - stream. The process is usually wrapped into an AutoInterrupt class to kill - it if the instance goes out of scope.""" + stream. The process is usually wrapped into an :class:`~git.cmd.Git.AutoInterrupt` + class to kill it if the instance goes out of scope. + """ __slots__ = ("_proc", "_stream") @@ -346,13 +367,18 @@ class ProcessStreamAdapter(object): @runtime_checkable class Traversable(Protocol): + """Simple interface to perform depth-first or breadth-first traversals in one + direction. - """Simple interface to perform depth-first or breadth-first traversals - into one direction. Subclasses only need to implement one function. - Instances of the Subclass must be hashable - Defined subclasses = [Commit, Tree, SubModule] + Instances of the subclass must be hashable. + + Defined subclasses: + + * :class:`Commit <git.objects.Commit>` + * :class:`Tree <git.objects.tree.Tree>` + * :class:`Submodule <git.objects.submodule.base.Submodule>` """ __slots__ = () @@ -361,9 +387,9 @@ class Traversable(Protocol): @abstractmethod def _get_intermediate_items(cls, item: Any) -> Sequence["Traversable"]: """ - Returns: + :return: Tuple of items connected to the given item. - Must be implemented in subclass + Must be implemented in subclass. class Commit:: (cls, Commit) -> Tuple[Commit, ...] class Submodule:: (cls, Submodule) -> Iterablelist[Submodule] @@ -373,11 +399,15 @@ class Traversable(Protocol): @abstractmethod def list_traverse(self, *args: Any, **kwargs: Any) -> Any: - """ """ + """Traverse self and collect all items found. + + Calling this directly on the abstract base class, including via a ``super()`` + proxy, is deprecated. Only overridden implementations should be called. + """ warnings.warn( "list_traverse() method should only be called from subclasses." - "Calling from Traversable abstract class will raise NotImplementedError in 3.1.20" - "Builtin sublclasses are 'Submodule', 'Tree' and 'Commit", + " Calling from Traversable abstract class will raise NotImplementedError in 4.0.0." + " The concrete subclasses in GitPython itself are 'Commit', 'RootModule', 'Submodule', and 'Tree'.", DeprecationWarning, stacklevel=2, ) @@ -386,39 +416,49 @@ class Traversable(Protocol): def _list_traverse( self, as_edge: bool = False, *args: Any, **kwargs: Any ) -> IterableList[Union["Commit", "Submodule", "Tree", "Blob"]]: + """Traverse self and collect all items found. + + :return: + :class:`~git.util.IterableList` with the results of the traversal as + produced by :meth:`traverse`:: + + Commit -> IterableList[Commit] + Submodule -> IterableList[Submodule] + Tree -> IterableList[Union[Submodule, Tree, Blob]] """ - :return: IterableList with the results of the traversal as produced by - traverse() - Commit -> IterableList['Commit'] - Submodule -> IterableList['Submodule'] - Tree -> IterableList[Union['Submodule', 'Tree', 'Blob']] - """ - # Commit and Submodule have id.__attribute__ as IterableObj - # Tree has id.__attribute__ inherited from IndexObject + # Commit and Submodule have id.__attribute__ as IterableObj. + # Tree has id.__attribute__ inherited from IndexObject. if isinstance(self, Has_id_attribute): id = self._id_attribute_ else: - id = "" # shouldn't reach here, unless Traversable subclass created with no _id_attribute_ - # could add _id_attribute_ to Traversable, or make all Traversable also Iterable? + # Shouldn't reach here, unless Traversable subclass created with no + # _id_attribute_. + id = "" + # Could add _id_attribute_ to Traversable, or make all Traversable also + # Iterable? if not as_edge: out: IterableList[Union["Commit", "Submodule", "Tree", "Blob"]] = IterableList(id) - out.extend(self.traverse(as_edge=as_edge, *args, **kwargs)) + out.extend(self.traverse(as_edge=as_edge, *args, **kwargs)) # noqa: B026 return out - # overloads in subclasses (mypy doesn't allow typing self: subclass) + # Overloads in subclasses (mypy doesn't allow typing self: subclass). # Union[IterableList['Commit'], IterableList['Submodule'], IterableList[Union['Submodule', 'Tree', 'Blob']]] else: - # Raise deprecationwarning, doesn't make sense to use this + # Raise DeprecationWarning, it doesn't make sense to use this. out_list: IterableList = IterableList(self.traverse(*args, **kwargs)) return out_list @abstractmethod def traverse(self, *args: Any, **kwargs: Any) -> Any: - """ """ + """Iterator yielding items found when traversing self. + + Calling this directly on the abstract base class, including via a ``super()`` + proxy, is deprecated. Only overridden implementations should be called. + """ warnings.warn( "traverse() method should only be called from subclasses." - "Calling from Traversable abstract class will raise NotImplementedError in 3.1.20" - "Builtin sublclasses are 'Submodule', 'Tree' and 'Commit", + " Calling from Traversable abstract class will raise NotImplementedError in 4.0.0." + " The concrete subclasses in GitPython itself are 'Commit', 'RootModule', 'Submodule', and 'Tree'.", DeprecationWarning, stacklevel=2, ) @@ -434,50 +474,57 @@ class Traversable(Protocol): ignore_self: int = 1, as_edge: bool = False, ) -> Union[Iterator[Union["Traversable", "Blob"]], Iterator[TraversedTup]]: - """:return: iterator yielding of items found when traversing self - :param predicate: f(i,d) returns False if item i at depth d should not be included in the result + """Iterator yielding items found when traversing `self`. + + :param predicate: + A function ``f(i,d)`` that returns ``False`` if item i at depth ``d`` should + not be included in the result. :param prune: - f(i,d) return True if the search should stop at item i at depth d. - Item i will not be returned. + A function ``f(i,d)`` that returns ``True`` if the search should stop at + item ``i`` at depth ``d``. Item ``i`` will not be returned. :param depth: - define at which level the iteration should not go deeper - if -1, there is no limit - if 0, you would effectively only get self, the root of the iteration - i.e. if 1, you would only get the first level of predecessors/successors + Defines at which level the iteration should not go deeper if -1. There is no + limit if 0, you would effectively only get `self`, the root of the + iteration. If 1, you would only get the first level of + predecessors/successors. :param branch_first: - if True, items will be returned branch first, otherwise depth first + If ``True``, items will be returned branch first, otherwise depth first. :param visit_once: - if True, items will only be returned once, although they might be encountered - several times. Loops are prevented that way. + If ``True``, items will only be returned once, although they might be + encountered several times. Loops are prevented that way. :param ignore_self: - if True, self will be ignored and automatically pruned from - the result. Otherwise it will be the first item to be returned. - If as_edge is True, the source of the first edge is None + If ``True``, `self` will be ignored and automatically pruned from the + result. Otherwise it will be the first item to be returned. If `as_edge` is + ``True``, the source of the first edge is ``None``. :param as_edge: - if True, return a pair of items, first being the source, second the - destination, i.e. tuple(src, dest) with the edge spanning from - source to destination""" - + If ``True``, return a pair of items, first being the source, second the + destination, i.e. tuple(src, dest) with the edge spanning from source to + destination. + + :return: + Iterator yielding items found when traversing `self`:: + + Commit -> Iterator[Union[Commit, Tuple[Commit, Commit]] Submodule -> + Iterator[Submodule, Tuple[Submodule, Submodule]] Tree -> + Iterator[Union[Blob, Tree, Submodule, + Tuple[Union[Submodule, Tree], Union[Blob, Tree, + Submodule]]] + + ignore_self=True is_edge=True -> Iterator[item] ignore_self=True + is_edge=False --> Iterator[item] ignore_self=False is_edge=True -> + Iterator[item] | Iterator[Tuple[src, item]] ignore_self=False + is_edge=False -> Iterator[Tuple[src, item]] """ - Commit -> Iterator[Union[Commit, Tuple[Commit, Commit]] - Submodule -> Iterator[Submodule, Tuple[Submodule, Submodule]] - Tree -> Iterator[Union[Blob, Tree, Submodule, - Tuple[Union[Submodule, Tree], Union[Blob, Tree, Submodule]]] - - ignore_self=True is_edge=True -> Iterator[item] - ignore_self=True is_edge=False --> Iterator[item] - ignore_self=False is_edge=True -> Iterator[item] | Iterator[Tuple[src, item]] - ignore_self=False is_edge=False -> Iterator[Tuple[src, item]]""" visited = set() stack: Deque[TraverseNT] = deque() - stack.append(TraverseNT(0, self, None)) # self is always depth level 0 + stack.append(TraverseNT(0, self, None)) # self is always depth level 0. def addToStack( stack: Deque[TraverseNT], @@ -486,8 +533,8 @@ class Traversable(Protocol): depth: int, ) -> None: lst = self._get_intermediate_items(item) - if not lst: # empty list - return None + if not lst: # Empty list + return if branch_first: stack.extendleft(TraverseNT(depth, i, src_item) for i in lst) else: @@ -497,7 +544,7 @@ class Traversable(Protocol): # END addToStack local method while stack: - d, item, src = stack.pop() # depth of item, item, item_source + d, item, src = stack.pop() # Depth of item, item, item_source if visit_once and item in visited: continue @@ -506,7 +553,9 @@ class Traversable(Protocol): visited.add(item) rval: Union[TraversedTup, "Traversable", "Blob"] - if as_edge: # if as_edge return (src, item) unless rrc is None (e.g. for first item) + if as_edge: + # If as_edge return (src, item) unless rrc is None + # (e.g. for first item). rval = (src, item) else: rval = item @@ -518,35 +567,47 @@ class Traversable(Protocol): if not skipStartItem and predicate(rval, d): yield rval - # only continue to next level if this is appropriate ! - nd = d + 1 - if depth > -1 and nd > depth: + # Only continue to next level if this is appropriate! + next_d = d + 1 + if depth > -1 and next_d > depth: continue - addToStack(stack, item, branch_first, nd) + addToStack(stack, item, branch_first, next_d) # END for each item on work stack @runtime_checkable class Serializable(Protocol): - - """Defines methods to serialize and deserialize objects from and into a data stream""" + """Defines methods to serialize and deserialize objects from and into a data + stream.""" __slots__ = () # @abstractmethod def _serialize(self, stream: "BytesIO") -> "Serializable": - """Serialize the data of this object into the given data stream - :note: a serialized object would ``_deserialize`` into the same object - :param stream: a file-like object - :return: self""" + """Serialize the data of this object into the given data stream. + + :note: + A serialized object would :meth:`_deserialize` into the same object. + + :param stream: + A file-like object. + + :return: + self + """ raise NotImplementedError("To be implemented in subclass") # @abstractmethod def _deserialize(self, stream: "BytesIO") -> "Serializable": - """Deserialize all information regarding this object from the stream - :param stream: a file-like object - :return: self""" + """Deserialize all information regarding this object from the stream. + + :param stream: + A file-like object. + + :return: + self + """ raise NotImplementedError("To be implemented in subclass") @@ -556,11 +617,10 @@ class TraversableIterableObj(IterableObj, Traversable): TIobj_tuple = Tuple[Union[T_TIobj, None], T_TIobj] def list_traverse(self: T_TIobj, *args: Any, **kwargs: Any) -> IterableList[T_TIobj]: - return super(TraversableIterableObj, self)._list_traverse(*args, **kwargs) + return super()._list_traverse(*args, **kwargs) - @overload # type: ignore - def traverse(self: T_TIobj) -> Iterator[T_TIobj]: - ... + @overload + def traverse(self: T_TIobj) -> Iterator[T_TIobj]: ... @overload def traverse( @@ -572,8 +632,7 @@ class TraversableIterableObj(IterableObj, Traversable): visit_once: bool, ignore_self: Literal[True], as_edge: Literal[False], - ) -> Iterator[T_TIobj]: - ... + ) -> Iterator[T_TIobj]: ... @overload def traverse( @@ -585,8 +644,7 @@ class TraversableIterableObj(IterableObj, Traversable): visit_once: bool, ignore_self: Literal[False], as_edge: Literal[True], - ) -> Iterator[Tuple[Union[T_TIobj, None], T_TIobj]]: - ... + ) -> Iterator[Tuple[Union[T_TIobj, None], T_TIobj]]: ... @overload def traverse( @@ -598,8 +656,7 @@ class TraversableIterableObj(IterableObj, Traversable): visit_once: bool, ignore_self: Literal[True], as_edge: Literal[True], - ) -> Iterator[Tuple[T_TIobj, T_TIobj]]: - ... + ) -> Iterator[Tuple[T_TIobj, T_TIobj]]: ... def traverse( self: T_TIobj, @@ -611,27 +668,33 @@ class TraversableIterableObj(IterableObj, Traversable): ignore_self: int = 1, as_edge: bool = False, ) -> Union[Iterator[T_TIobj], Iterator[Tuple[T_TIobj, T_TIobj]], Iterator[TIobj_tuple]]: - """For documentation, see util.Traversable._traverse()""" + """For documentation, see :meth:`Traversable._traverse`.""" + + ## To typecheck instead of using cast: + # + # import itertools + # from git.types import TypeGuard + # def is_commit_traversed(inp: Tuple) -> TypeGuard[Tuple[Iterator[Tuple['Commit', 'Commit']]]]: + # for x in inp[1]: + # if not isinstance(x, tuple) and len(x) != 2: + # if all(isinstance(inner, Commit) for inner in x): + # continue + # return True + # + # ret = super(Commit, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self, as_edge) + # ret_tup = itertools.tee(ret, 2) + # assert is_commit_traversed(ret_tup), f"{[type(x) for x in list(ret_tup[0])]}" + # return ret_tup[0] - """ - # To typecheck instead of using cast. - import itertools - from git.types import TypeGuard - def is_commit_traversed(inp: Tuple) -> TypeGuard[Tuple[Iterator[Tuple['Commit', 'Commit']]]]: - for x in inp[1]: - if not isinstance(x, tuple) and len(x) != 2: - if all(isinstance(inner, Commit) for inner in x): - continue - return True - - ret = super(Commit, self).traverse(predicate, prune, depth, branch_first, visit_once, ignore_self, as_edge) - ret_tup = itertools.tee(ret, 2) - assert is_commit_traversed(ret_tup), f"{[type(x) for x in list(ret_tup[0])]}" - return ret_tup[0] - """ return cast( Union[Iterator[T_TIobj], Iterator[Tuple[Union[None, T_TIobj], T_TIobj]]], - super(TraversableIterableObj, self)._traverse( - predicate, prune, depth, branch_first, visit_once, ignore_self, as_edge # type: ignore + super()._traverse( + predicate, # type: ignore[arg-type] + prune, # type: ignore[arg-type] + depth, + branch_first, + visit_once, + ignore_self, + as_edge, ), ) diff --git a/git/refs/__init__.py b/git/refs/__init__.py index 1486dff..d6157e6 100644 --- a/git/refs/__init__.py +++ b/git/refs/__init__.py @@ -1,9 +1,21 @@ -# flake8: noqa -# import all modules in order, fix the names they require -from .symbolic import * -from .reference import * -from .head import * -from .tag import * -from .remote import * +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ -from .log import * +__all__ = [ + "HEAD", + "Head", + "RefLog", + "RefLogEntry", + "Reference", + "RemoteReference", + "SymbolicReference", + "Tag", + "TagReference", +] + +from .head import HEAD, Head +from .log import RefLog, RefLogEntry +from .reference import Reference +from .remote import RemoteReference +from .symbolic import SymbolicReference +from .tag import Tag, TagReference diff --git a/git/refs/head.py b/git/refs/head.py index 26efc6c..6836344 100644 --- a/git/refs/head.py +++ b/git/refs/head.py @@ -1,25 +1,33 @@ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Some ref-based objects. + +Note the distinction between the :class:`HEAD` and :class:`Head` classes. +""" + +__all__ = ["HEAD", "Head"] + from git.config import GitConfigParser, SectionConstraint -from git.util import join_path from git.exc import GitCommandError +from git.util import join_path -from .symbolic import SymbolicReference from .reference import Reference +from .symbolic import SymbolicReference -# typinng --------------------------------------------------- +# typing --------------------------------------------------- -from typing import Any, Sequence, Union, TYPE_CHECKING +from typing import Any, Sequence, TYPE_CHECKING, Union -from git.types import PathLike, Commit_ish +from git.types import Commit_ish, PathLike if TYPE_CHECKING: - from git.repo import Repo from git.objects import Commit from git.refs import RemoteReference + from git.repo import Repo # ------------------------------------------------------------------- -__all__ = ["HEAD", "Head"] - def strip_quotes(string: str) -> str: if string.startswith('"') and string.endswith('"'): @@ -28,24 +36,28 @@ def strip_quotes(string: str) -> str: class HEAD(SymbolicReference): - - """Special case of a Symbolic Reference as it represents the repository's - HEAD reference.""" + """Special case of a :class:`~git.refs.symbolic.SymbolicReference` representing the + repository's HEAD reference.""" _HEAD_NAME = "HEAD" _ORIG_HEAD_NAME = "ORIG_HEAD" + __slots__ = () - def __init__(self, repo: "Repo", path: PathLike = _HEAD_NAME): + # TODO: This can be removed once SymbolicReference.commit has static type hints. + commit: "Commit" + + def __init__(self, repo: "Repo", path: PathLike = _HEAD_NAME) -> None: if path != self._HEAD_NAME: raise ValueError("HEAD instance must point to %r, got %r" % (self._HEAD_NAME, path)) - super(HEAD, self).__init__(repo, path) - self.commit: "Commit" + super().__init__(repo, path) def orig_head(self) -> SymbolicReference: """ - :return: SymbolicReference pointing at the ORIG_HEAD, which is maintained - to contain the previous value of HEAD""" + :return: + :class:`~git.refs.symbolic.SymbolicReference` pointing at the ORIG_HEAD, + which is maintained to contain the previous value of HEAD. + """ return SymbolicReference(self.repo, self._ORIG_HEAD_NAME) def reset( @@ -56,38 +68,39 @@ class HEAD(SymbolicReference): paths: Union[PathLike, Sequence[PathLike], None] = None, **kwargs: Any, ) -> "HEAD": - """Reset our HEAD to the given commit optionally synchronizing - the index and working tree. The reference we refer to will be set to - commit as well. + """Reset our HEAD to the given commit optionally synchronizing the index and + working tree. The reference we refer to will be set to commit as well. :param commit: - Commit object, Reference Object or string identifying a revision we - should reset HEAD to. + :class:`~git.objects.commit.Commit`, :class:`~git.refs.reference.Reference`, + or string identifying a revision we should reset HEAD to. :param index: - If True, the index will be set to match the given commit. Otherwise - it will not be touched. + If ``True``, the index will be set to match the given commit. + Otherwise it will not be touched. :param working_tree: - If True, the working tree will be forcefully adjusted to match the given + If ``True``, the working tree will be forcefully adjusted to match the given commit, possibly overwriting uncommitted changes without warning. - If working_tree is True, index must be true as well + If `working_tree` is ``True``, `index` must be ``True`` as well. :param paths: Single path or list of paths relative to the git root directory that are to be reset. This allows to partially reset individual files. :param kwargs: - Additional arguments passed to git-reset. + Additional arguments passed to :manpage:`git-reset(1)`. - :return: self""" + :return: + self + """ mode: Union[str, None] mode = "--soft" if index: mode = "--mixed" - # it appears, some git-versions declare mixed and paths deprecated - # see http://github.com/Byron/GitPython/issues#issue/2 + # Explicit "--mixed" when passing paths is deprecated since git 1.5.4. + # See https://github.com/gitpython-developers/GitPython/discussions/1876. if paths: mode = None # END special case @@ -104,7 +117,7 @@ class HEAD(SymbolicReference): self.repo.git.reset(mode, commit, "--", paths, **kwargs) except GitCommandError as e: # git nowadays may use 1 as status to indicate there are still unstaged - # modifications after the reset + # modifications after the reset. if e.status != 1: raise # END handle exception @@ -113,9 +126,8 @@ class HEAD(SymbolicReference): class Head(Reference): - - """A Head is a named reference to a Commit. Every Head instance contains a name - and a Commit object. + """A Head is a named reference to a :class:`~git.objects.commit.Commit`. Every Head + instance contains a name and a :class:`~git.objects.commit.Commit` object. Examples:: @@ -129,33 +141,36 @@ class Head(Reference): <git.Commit "1c09f116cbc2cb4100fb6935bb162daa4723f455"> >>> head.commit.hexsha - '1c09f116cbc2cb4100fb6935bb162daa4723f455'""" + '1c09f116cbc2cb4100fb6935bb162daa4723f455' + """ _common_path_default = "refs/heads" k_config_remote = "remote" - k_config_remote_ref = "merge" # branch to merge from remote + k_config_remote_ref = "merge" # Branch to merge from remote. @classmethod def delete(cls, repo: "Repo", *heads: "Union[Head, str]", force: bool = False, **kwargs: Any) -> None: - """Delete the given heads + """Delete the given heads. :param force: - If True, the heads will be deleted even if they are not yet merged into - the main development stream. - Default False""" + If ``True``, the heads will be deleted even if they are not yet merged into + the main development stream. Default ``False``. + """ flag = "-d" if force: flag = "-D" repo.git.branch(flag, *heads) def set_tracking_branch(self, remote_reference: Union["RemoteReference", None]) -> "Head": - """ - Configure this branch to track the given remote reference. This will alter - this branch's configuration accordingly. + """Configure this branch to track the given remote reference. This will + alter this branch's configuration accordingly. + + :param remote_reference: + The remote reference to track or None to untrack any references. - :param remote_reference: The remote reference to track or None to untrack - any references - :return: self""" + :return: + self + """ from .remote import RemoteReference if remote_reference is not None and not isinstance(remote_reference, RemoteReference): @@ -179,8 +194,10 @@ class Head(Reference): def tracking_branch(self) -> Union["RemoteReference", None]: """ - :return: The remote_reference we are tracking, or None if we are - not a tracking branch""" + :return: + The remote reference we are tracking, or ``None`` if we are not a tracking + branch. + """ from .remote import RemoteReference reader = self.config_reader() @@ -193,22 +210,26 @@ class Head(Reference): return RemoteReference(self.repo, remote_refpath) # END handle have tracking branch - # we are not a tracking branch + # We are not a tracking branch. return None def rename(self, new_path: PathLike, force: bool = False) -> "Head": - """Rename self to a new path + """Rename self to a new path. :param new_path: - Either a simple name or a path, i.e. new_name or features/new_name. - The prefix refs/heads is implied + Either a simple name or a path, e.g. ``new_name`` or ``features/new_name``. + The prefix ``refs/heads`` is implied. :param force: - If True, the rename will succeed even if a head with the target name + If ``True``, the rename will succeed even if a head with the target name already exists. - :return: self - :note: respects the ref log as git commands are used""" + :return: + self + + :note: + Respects the ref log, as git commands are used. + """ flag = "-m" if force: flag = "-M" @@ -218,30 +239,32 @@ class Head(Reference): return self def checkout(self, force: bool = False, **kwargs: Any) -> Union["HEAD", "Head"]: - """Checkout this head by setting the HEAD to this reference, by updating the index - to reflect the tree we point to and by updating the working tree to reflect - the latest index. + """Check out this head by setting the HEAD to this reference, by updating the + index to reflect the tree we point to and by updating the working tree to + reflect the latest index. The command will fail if changed working tree files would be overwritten. :param force: - If True, changes to the index and the working tree will be discarded. - If False, GitCommandError will be raised in that situation. + If ``True``, changes to the index and the working tree will be discarded. + If ``False``, :exc:`~git.exc.GitCommandError` will be raised in that + situation. :param kwargs: - Additional keyword arguments to be passed to git checkout, i.e. - b='new_branch' to create a new branch at the given spot. + Additional keyword arguments to be passed to git checkout, e.g. + ``b="new_branch"`` to create a new branch at the given spot. :return: - The active branch after the checkout operation, usually self unless - a new branch has been created. + The active branch after the checkout operation, usually self unless a new + branch has been created. If there is no active branch, as the HEAD is now detached, the HEAD reference will be returned instead. :note: - By default it is only allowed to checkout heads - everything else - will leave the HEAD detached which is allowed and possible, but remains - a special state that some tools might not be able to handle.""" + By default it is only allowed to checkout heads - everything else will leave + the HEAD detached which is allowed and possible, but remains a special state + that some tools might not be able to handle. + """ kwargs["f"] = force if kwargs["f"] is False: kwargs.pop("f") @@ -264,14 +287,18 @@ class Head(Reference): def config_reader(self) -> SectionConstraint[GitConfigParser]: """ - :return: A configuration parser instance constrained to only read - this instance's values""" + :return: + A configuration parser instance constrained to only read this instance's + values. + """ return self._config_parser(read_only=True) def config_writer(self) -> SectionConstraint[GitConfigParser]: """ - :return: A configuration writer instance with read-and write access - to options of this head""" + :return: + A configuration writer instance with read-and write access to options of + this head. + """ return self._config_parser(read_only=False) # } END configuration diff --git a/git/refs/log.py b/git/refs/log.py index ef3f86b..17e3a94 100644 --- a/git/refs/log.py +++ b/git/refs/log.py @@ -1,55 +1,57 @@ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +__all__ = ["RefLog", "RefLogEntry"] + from mmap import mmap +import os.path as osp import re import time as _time from git.compat import defenc from git.objects.util import ( - parse_date, Serializable, altz_to_utctz_str, + parse_date, ) from git.util import ( Actor, LockedFD, LockFile, assure_directory_exists, - to_native_path, bin_to_hex, file_contents_ro_filepath, + to_native_path, ) -import os.path as osp - - # typing ------------------------------------------------------------------ -from typing import Iterator, List, Tuple, Union, TYPE_CHECKING +from typing import Iterator, List, Tuple, TYPE_CHECKING, Union from git.types import PathLike if TYPE_CHECKING: - from git.refs import SymbolicReference from io import BytesIO - from git.config import GitConfigParser, SectionConstraint # NOQA -# ------------------------------------------------------------------------------ + from git.config import GitConfigParser, SectionConstraint + from git.refs import SymbolicReference -__all__ = ["RefLog", "RefLogEntry"] +# ------------------------------------------------------------------------------ class RefLogEntry(Tuple[str, str, Actor, Tuple[int, int], str]): + """Named tuple allowing easy access to the revlog data fields.""" - """Named tuple allowing easy access to the revlog data fields""" + _re_hexsha_only = re.compile(r"^[0-9A-Fa-f]{40}$") - _re_hexsha_only = re.compile("^[0-9A-Fa-f]{40}$") __slots__ = () def __repr__(self) -> str: - """Representation of ourselves in git reflog format""" + """Representation of ourselves in git reflog format.""" return self.format() def format(self) -> str: - """:return: a string suitable to be placed in a reflog file""" + """:return: A string suitable to be placed in a reflog file.""" act = self.actor time = self.time return "{} {} {} <{}> {!s} {}\t{}\n".format( @@ -64,30 +66,31 @@ class RefLogEntry(Tuple[str, str, Actor, Tuple[int, int], str]): @property def oldhexsha(self) -> str: - """The hexsha to the commit the ref pointed to before the change""" + """The hexsha to the commit the ref pointed to before the change.""" return self[0] @property def newhexsha(self) -> str: - """The hexsha to the commit the ref now points to, after the change""" + """The hexsha to the commit the ref now points to, after the change.""" return self[1] @property def actor(self) -> Actor: - """Actor instance, providing access""" + """Actor instance, providing access.""" return self[2] @property def time(self) -> Tuple[int, int]: - """time as tuple: + """Time as tuple: - * [0] = int(time) - * [1] = int(timezone_offset) in time.altzone format""" + * [0] = ``int(time)`` + * [1] = ``int(timezone_offset)`` in :attr:`time.altzone` format + """ return self[3] @property def message(self) -> str: - """Message describing the operation that acted on the reference""" + """Message describing the operation that acted on the reference.""" return self[4] @classmethod @@ -100,7 +103,7 @@ class RefLogEntry(Tuple[str, str, Actor, Tuple[int, int], str]): tz_offset: int, message: str, ) -> "RefLogEntry": # skipcq: PYL-W0621 - """:return: New instance of a RefLogEntry""" + """:return: New instance of a :class:`RefLogEntry`""" if not isinstance(actor, Actor): raise ValueError("Need actor instance, got %s" % actor) # END check types @@ -108,9 +111,14 @@ class RefLogEntry(Tuple[str, str, Actor, Tuple[int, int], str]): @classmethod def from_line(cls, line: bytes) -> "RefLogEntry": - """:return: New RefLogEntry instance from the given revlog line. - :param line: line bytes without trailing newline - :raise ValueError: If line could not be parsed""" + """:return: New :class:`RefLogEntry` instance from the given revlog line. + + :param line: + Line bytes without trailing newline + + :raise ValueError: + If `line` could not be parsed. + """ line_str = line.decode(defenc) fields = line_str.split("\t", 1) if len(fields) == 1: @@ -141,24 +149,24 @@ class RefLogEntry(Tuple[str, str, Actor, Tuple[int, int], str]): class RefLog(List[RefLogEntry], Serializable): + R"""A reflog contains :class:`RefLogEntry`\s, each of which defines a certain state + of the head in question. Custom query methods allow to retrieve log entries by date + or by other criteria. - """A reflog contains RefLogEntrys, each of which defines a certain state - of the head in question. Custom query methods allow to retrieve log entries - by date or by other criteria. - - Reflog entries are ordered, the first added entry is first in the list, the last - entry, i.e. the last change of the head or reference, is last in the list.""" + Reflog entries are ordered. The first added entry is first in the list. The last + entry, i.e. the last change of the head or reference, is last in the list. + """ __slots__ = ("_path",) def __new__(cls, filepath: Union[PathLike, None] = None) -> "RefLog": - inst = super(RefLog, cls).__new__(cls) + inst = super().__new__(cls) return inst - def __init__(self, filepath: Union[PathLike, None] = None): + def __init__(self, filepath: Union[PathLike, None] = None) -> None: """Initialize this instance with an optional filepath, from which we will - initialize our data. The path is also used to write changes back using - the write() method""" + initialize our data. The path is also used to write changes back using the + :meth:`write` method.""" self._path = filepath if filepath is not None: self._read_from_file() @@ -168,7 +176,7 @@ class RefLog(List[RefLogEntry], Serializable): try: fmap = file_contents_ro_filepath(self._path, stream=True, allow_mmap=True) except OSError: - # it is possible and allowed that the file doesn't exist ! + # It is possible and allowed that the file doesn't exist! return # END handle invalid log @@ -183,31 +191,44 @@ class RefLog(List[RefLogEntry], Serializable): @classmethod def from_file(cls, filepath: PathLike) -> "RefLog": """ - :return: a new RefLog instance containing all entries from the reflog - at the given filepath - :param filepath: path to reflog - :raise ValueError: If the file could not be read or was corrupted in some way""" + :return: + A new :class:`RefLog` instance containing all entries from the reflog at the + given `filepath`. + + :param filepath: + Path to reflog. + + :raise ValueError: + If the file could not be read or was corrupted in some way. + """ return cls(filepath) @classmethod def path(cls, ref: "SymbolicReference") -> str: """ - :return: string to absolute path at which the reflog of the given ref - instance would be found. The path is not guaranteed to point to a valid - file though. - :param ref: SymbolicReference instance""" + :return: + String to absolute path at which the reflog of the given ref instance would + be found. The path is not guaranteed to point to a valid file though. + + :param ref: + :class:`~git.refs.symbolic.SymbolicReference` instance + """ return osp.join(ref.repo.git_dir, "logs", to_native_path(ref.path)) @classmethod def iter_entries(cls, stream: Union[str, "BytesIO", mmap]) -> Iterator[RefLogEntry]: """ - :return: Iterator yielding RefLogEntry instances, one for each line read - sfrom the given stream. - :param stream: file-like object containing the revlog in its native format - or string instance pointing to a file to read""" + :return: + Iterator yielding :class:`RefLogEntry` instances, one for each line read + from the given stream. + + :param stream: + File-like object containing the revlog in its native format or string + instance pointing to a file to read. + """ new_entry = RefLogEntry.from_line if isinstance(stream, str): - # default args return mmap on py>3 + # Default args return mmap since Python 3. _stream = file_contents_ro_filepath(stream) assert isinstance(_stream, mmap) else: @@ -223,23 +244,28 @@ class RefLog(List[RefLogEntry], Serializable): @classmethod def entry_at(cls, filepath: PathLike, index: int) -> "RefLogEntry": """ - :return: RefLogEntry at the given index + :return: + :class:`RefLogEntry` at the given index. - :param filepath: full path to the index file from which to read the entry + :param filepath: + Full path to the index file from which to read the entry. - :param index: python list compatible index, i.e. it may be negative to - specify an entry counted from the end of the list + :param index: + Python list compatible index, i.e. it may be negative to specify an entry + counted from the end of the list. - :raise IndexError: If the entry didn't exist + :raise IndexError: + If the entry didn't exist. - .. note:: This method is faster as it only parses the entry at index, skipping - all other lines. Nonetheless, the whole file has to be read if - the index is negative + :note: + This method is faster as it only parses the entry at index, skipping all + other lines. Nonetheless, the whole file has to be read if the index is + negative. """ with open(filepath, "rb") as fp: if index < 0: return RefLogEntry.from_line(fp.readlines()[index].strip()) - # read until index is reached + # Read until index is reached. for i in range(index + 1): line = fp.readline() @@ -254,7 +280,9 @@ class RefLog(List[RefLogEntry], Serializable): def to_file(self, filepath: PathLike) -> None: """Write the contents of the reflog instance to a file at the given filepath. - :param filepath: path to file, parent directories are assumed to exist""" + :param filepath: + Path to file. Parent directories are assumed to exist. + """ lfd = LockedFD(filepath) assure_directory_exists(filepath, is_file=True) @@ -279,19 +307,34 @@ class RefLog(List[RefLogEntry], Serializable): ) -> "RefLogEntry": """Append a new log entry to the revlog at filepath. - :param config_reader: configuration reader of the repository - used to obtain - user information. May also be an Actor instance identifying the committer directly or None. - :param filepath: full path to the log file - :param oldbinsha: binary sha of the previous commit - :param newbinsha: binary sha of the current commit - :param message: message describing the change to the reference - :param write: If True, the changes will be written right away. Otherwise - the change will not be written + :param config_reader: + Configuration reader of the repository - used to obtain user information. + May also be an :class:`~git.util.Actor` instance identifying the committer + directly or ``None``. - :return: RefLogEntry objects which was appended to the log + :param filepath: + Full path to the log file. - :note: As we are append-only, concurrent access is not a problem as we - do not interfere with readers.""" + :param oldbinsha: + Binary sha of the previous commit. + + :param newbinsha: + Binary sha of the current commit. + + :param message: + Message describing the change to the reference. + + :param write: + If ``True``, the changes will be written right away. + Otherwise the change will not be written. + + :return: + :class:`RefLogEntry` objects which was appended to the log. + + :note: + As we are append-only, concurrent access is not a problem as we do not + interfere with readers. + """ if len(oldbinsha) != 20 or len(newbinsha) != 20: raise ValueError("Shas need to be given in binary format") @@ -325,9 +368,11 @@ class RefLog(List[RefLogEntry], Serializable): return entry def write(self) -> "RefLog": - """Write this instance's data to the file we are originating from + """Write this instance's data to the file we are originating from. - :return: self""" + :return: + self + """ if self._path is None: raise ValueError("Instance was not initialized with a path, use to_file(...) instead") # END assert path @@ -337,10 +382,11 @@ class RefLog(List[RefLogEntry], Serializable): # } END interface # { Serializable Interface + def _serialize(self, stream: "BytesIO") -> "RefLog": write = stream.write - # write all entries + # Write all entries. for e in self: write(e.format().encode(defenc)) # END for each entry @@ -348,5 +394,6 @@ class RefLog(List[RefLogEntry], Serializable): def _deserialize(self, stream: "BytesIO") -> "RefLog": self.extend(self.iter_entries(stream)) - # } END serializable interface return self + + # } END serializable interface diff --git a/git/refs/reference.py b/git/refs/reference.py index 4f9e3a0..e5d4737 100644 --- a/git/refs/reference.py +++ b/git/refs/reference.py @@ -1,28 +1,29 @@ -from git.util import ( - LazyMixin, - IterableObj, -) -from .symbolic import SymbolicReference, T_References +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +__all__ = ["Reference"] +from git.util import IterableObj, LazyMixin + +from .symbolic import SymbolicReference, T_References # typing ------------------------------------------------------------------ -from typing import Any, Callable, Iterator, Type, Union, TYPE_CHECKING # NOQA -from git.types import Commit_ish, PathLike, _T # NOQA +from typing import Any, Callable, Iterator, TYPE_CHECKING, Type, Union + +from git.types import AnyGitObject, PathLike, _T if TYPE_CHECKING: from git.repo import Repo # ------------------------------------------------------------------------------ - -__all__ = ["Reference"] - # { Utilities def require_remote_ref_path(func: Callable[..., _T]) -> Callable[..., _T]: - """A decorator raising a TypeError if we are not a valid remote, based on the path""" + """A decorator raising :exc:`ValueError` if we are not a valid remote, based on the + path.""" def wrapper(self: T_References, *args: Any) -> _T: if not self.is_remote(): @@ -34,32 +35,40 @@ def require_remote_ref_path(func: Callable[..., _T]) -> Callable[..., _T]: return wrapper -# }END utilities +# } END utilities class Reference(SymbolicReference, LazyMixin, IterableObj): + """A named reference to any object. - """Represents a named reference to any object. Subclasses may apply restrictions though, - i.e. Heads can only point to commits.""" + Subclasses may apply restrictions though, e.g., a :class:`~git.refs.head.Head` can + only point to commits. + """ __slots__ = () + _points_to_commits_only = False _resolve_ref_on_create = True _common_path_default = "refs" def __init__(self, repo: "Repo", path: PathLike, check_path: bool = True) -> None: - """Initialize this instance + """Initialize this instance. + + :param repo: + Our parent repository. - :param repo: Our parent repository :param path: - Path relative to the .git/ directory pointing to the ref in question, i.e. - refs/heads/master - :param check_path: if False, you can provide any path. Otherwise the path must start with the - default path prefix of this type.""" + Path relative to the ``.git/`` directory pointing to the ref in question, + e.g. ``refs/heads/master``. + + :param check_path: + If ``False``, you can provide any path. + Otherwise the path must start with the default path prefix of this type. + """ if check_path and not str(path).startswith(self._common_path_default + "/"): raise ValueError(f"Cannot instantiate {self.__class__.__name__!r} from path {path}") - self.path: str # SymbolicReference converts to string atm - super(Reference, self).__init__(repo, path) + self.path: str # SymbolicReference converts to string at the moment. + super().__init__(repo, path) def __str__(self) -> str: return self.name @@ -69,12 +78,14 @@ class Reference(SymbolicReference, LazyMixin, IterableObj): # @ReservedAssignment def set_object( self, - object: Union[Commit_ish, "SymbolicReference", str], + object: Union[AnyGitObject, "SymbolicReference", str], logmsg: Union[str, None] = None, ) -> "Reference": - """Special version which checks if the head-log needs an update as well + """Special version which checks if the head-log needs an update as well. - :return: self""" + :return: + self + """ oldbinsha = None if logmsg is not None: head = self.repo.head @@ -83,33 +94,37 @@ class Reference(SymbolicReference, LazyMixin, IterableObj): # END handle commit retrieval # END handle message is set - super(Reference, self).set_object(object, logmsg) + super().set_object(object, logmsg) if oldbinsha is not None: - # /* from refs.c in git-source - # * Special hack: If a branch is updated directly and HEAD - # * points to it (may happen on the remote side of a push - # * for example) then logically the HEAD reflog should be - # * updated too. - # * A generic solution implies reverse symref information, - # * but finding all symrefs pointing to the given branch - # * would be rather costly for this rare event (the direct - # * update of a branch) to be worth it. So let's cheat and - # * check with HEAD only which should cover 99% of all usage - # * scenarios (even 100% of the default ones). - # */ + # From refs/files-backend.c in git-source: + # /* + # * Special hack: If a branch is updated directly and HEAD + # * points to it (may happen on the remote side of a push + # * for example) then logically the HEAD reflog should be + # * updated too. + # * A generic solution implies reverse symref information, + # * but finding all symrefs pointing to the given branch + # * would be rather costly for this rare event (the direct + # * update of a branch) to be worth it. So let's cheat and + # * check with HEAD only which should cover 99% of all usage + # * scenarios (even 100% of the default ones). + # */ self.repo.head.log_append(oldbinsha, logmsg) # END check if the head return self - # NOTE: Don't have to overwrite properties as the will only work without a the log + # NOTE: No need to overwrite properties, as the will only work without a the log. @property def name(self) -> str: - """:return: (shortest) Name of this reference - it may contain path components""" - # first two path tokens are can be removed as they are - # refs/heads or refs/tags or refs/remotes + """ + :return: + (shortest) Name of this reference - it may contain path components + """ + # The first two path tokens can be removed as they are + # refs/heads or refs/tags or refs/remotes. tokens = self.path.split("/") if len(tokens) < 3: return self.path # could be refs/HEAD @@ -123,31 +138,38 @@ class Reference(SymbolicReference, LazyMixin, IterableObj): *args: Any, **kwargs: Any, ) -> Iterator[T_References]: - """Equivalent to SymbolicReference.iter_items, but will return non-detached - references as well.""" + """Equivalent to + :meth:`SymbolicReference.iter_items <git.refs.symbolic.SymbolicReference.iter_items>`, + but will return non-detached references as well.""" return cls._iter_items(repo, common_path) - # }END interface + # } END interface # { Remote Interface - @property # type: ignore ## mypy cannot deal with properties with an extra decorator (2021-04-21) + @property @require_remote_ref_path def remote_name(self) -> str: """ :return: - Name of the remote we are a reference of, such as 'origin' for a reference - named 'origin/master'""" + Name of the remote we are a reference of, such as ``origin`` for a reference + named ``origin/master``. + """ tokens = self.path.split("/") # /refs/remotes/<remote name>/<branch_name> return tokens[2] - @property # type: ignore ## mypy cannot deal with properties with an extra decorator (2021-04-21) + @property @require_remote_ref_path def remote_head(self) -> str: - """:return: Name of the remote head itself, i.e. master. - :note: The returned name is usually not qualified enough to uniquely identify - a branch""" + """ + :return: + Name of the remote head itself, e.g. ``master``. + + :note: + The returned name is usually not qualified enough to uniquely identify a + branch. + """ tokens = self.path.split("/") return "/".join(tokens[3:]) diff --git a/git/refs/remote.py b/git/refs/remote.py index ec10c5a..b4f4f7b 100644 --- a/git/refs/remote.py +++ b/git/refs/remote.py @@ -1,28 +1,31 @@ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Module implementing a remote object allowing easy access to git remotes.""" + +__all__ = ["RemoteReference"] + import os from git.util import join_path from .head import Head - -__all__ = ["RemoteReference"] - # typing ------------------------------------------------------------------ -from typing import Any, Iterator, NoReturn, Union, TYPE_CHECKING -from git.types import PathLike +from typing import Any, Iterator, NoReturn, TYPE_CHECKING, Union +from git.types import PathLike if TYPE_CHECKING: + from git.remote import Remote from git.repo import Repo - from git import Remote # ------------------------------------------------------------------------------ class RemoteReference(Head): - - """Represents a reference pointing to a remote head.""" + """A reference pointing to a remote head.""" _common_path_default = Head._remote_common_path_default @@ -35,29 +38,30 @@ class RemoteReference(Head): *args: Any, **kwargs: Any, ) -> Iterator["RemoteReference"]: - """Iterate remote references, and if given, constrain them to the given remote""" + """Iterate remote references, and if given, constrain them to the given remote.""" common_path = common_path or cls._common_path_default if remote is not None: common_path = join_path(common_path, str(remote)) # END handle remote constraint # super is Reference - return super(RemoteReference, cls).iter_items(repo, common_path) + return super().iter_items(repo, common_path) - # The Head implementation of delete also accepts strs, but this - # implementation does not. mypy doesn't have a way of representing - # tightening the types of arguments in subclasses and recommends Any or - # "type: ignore". (See https://github.com/python/typing/issues/241) + # The Head implementation of delete also accepts strs, but this implementation does + # not. mypy doesn't have a way of representing tightening the types of arguments in + # subclasses and recommends Any or "type: ignore". + # (See: https://github.com/python/typing/issues/241) @classmethod - def delete(cls, repo: "Repo", *refs: "RemoteReference", **kwargs: Any) -> None: # type: ignore - """Delete the given remote references + def delete(cls, repo: "Repo", *refs: "RemoteReference", **kwargs: Any) -> None: # type: ignore[override] + """Delete the given remote references. :note: - kwargs are given for comparability with the base class method as we - should not narrow the signature.""" + `kwargs` are given for comparability with the base class method as we + should not narrow the signature. + """ repo.git.branch("-d", "-r", *refs) - # the official deletion method will ignore remote symbolic refs - these - # are generally ignored in the refs/ folder. We don't though - # and delete remainders manually + # The official deletion method will ignore remote symbolic refs - these are + # generally ignored in the refs/ folder. We don't though and delete remainders + # manually. for ref in refs: try: os.remove(os.path.join(repo.common_dir, ref.path)) @@ -71,5 +75,5 @@ class RemoteReference(Head): @classmethod def create(cls, *args: Any, **kwargs: Any) -> NoReturn: - """Used to disable this method""" + """Raise :exc:`TypeError`. Defined so the ``create`` method is disabled.""" raise TypeError("Cannot explicitly create remote references") diff --git a/git/refs/symbolic.py b/git/refs/symbolic.py index 5491604..510850b 100644 --- a/git/refs/symbolic.py +++ b/git/refs/symbolic.py @@ -1,20 +1,24 @@ -from git.types import PathLike +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +__all__ = ["SymbolicReference"] + import os +from gitdb.exc import BadName, BadObject + from git.compat import defenc -from git.objects import Object +from git.objects.base import Object from git.objects.commit import Commit +from git.refs.log import RefLog from git.util import ( + LockedFD, + assure_directory_exists, + hex_to_bin, join_path, join_path_native, to_native_path_linux, - assure_directory_exists, - hex_to_bin, - LockedFD, ) -from gitdb.exc import BadObject, BadName - -from .log import RefLog # typing ------------------------------------------------------------------ @@ -22,21 +26,22 @@ from typing import ( Any, Iterator, List, + TYPE_CHECKING, Tuple, Type, TypeVar, Union, - TYPE_CHECKING, cast, -) # NOQA -from git.types import Commit_ish, PathLike # NOQA +) + +from git.types import AnyGitObject, PathLike if TYPE_CHECKING: - from git.repo import Repo - from git.refs import Head, TagReference, RemoteReference, Reference - from .log import RefLogEntry from git.config import GitConfigParser from git.objects.commit import Actor + from git.refs import Head, TagReference, RemoteReference, Reference + from git.refs.log import RefLogEntry + from git.repo import Repo T_References = TypeVar("T_References", bound="SymbolicReference") @@ -44,33 +49,32 @@ T_References = TypeVar("T_References", bound="SymbolicReference") # ------------------------------------------------------------------------------ -__all__ = ["SymbolicReference"] - - def _git_dir(repo: "Repo", path: Union[PathLike, None]) -> PathLike: - """Find the git dir that's appropriate for the path""" + """Find the git dir that is appropriate for the path.""" name = f"{path}" if name in ["HEAD", "ORIG_HEAD", "FETCH_HEAD", "index", "logs"]: return repo.git_dir return repo.common_dir -class SymbolicReference(object): +class SymbolicReference: + """Special case of a reference that is symbolic. - """Represents a special case of a reference such that this reference is symbolic. - It does not point to a specific commit, but to another Head, which itself - specifies a commit. + This does not point to a specific commit, but to another + :class:`~git.refs.head.Head`, which itself specifies a commit. - A typical example for a symbolic reference is HEAD.""" + A typical example for a symbolic reference is :class:`~git.refs.head.HEAD`. + """ __slots__ = ("repo", "path") + _resolve_ref_on_create = False _points_to_commits_only = True _common_path_default = "" _remote_common_path_default = "refs/remotes" _id_attribute_ = "name" - def __init__(self, repo: "Repo", path: PathLike, check_path: bool = False): + def __init__(self, repo: "Repo", path: PathLike, check_path: bool = False) -> None: self.repo = repo self.path = path @@ -96,8 +100,9 @@ class SymbolicReference(object): def name(self) -> str: """ :return: - In case of symbolic references, the shortest assumable name - is the path itself.""" + In case of symbolic references, the shortest assumable name is the path + itself. + """ return str(self.path) @property @@ -110,8 +115,12 @@ class SymbolicReference(object): @classmethod def _iter_packed_refs(cls, repo: "Repo") -> Iterator[Tuple[str, str]]: - """Returns an iterator yielding pairs of sha1/path pairs (as strings) for the corresponding refs. - :note: The packed refs file will be kept open as long as we iterate""" + """Return an iterator yielding pairs of sha1/path pairs (as strings) for the + corresponding refs. + + :note: + The packed refs file will be kept open as long as we iterate. + """ try: with open(cls._get_packed_refs_path(repo), "rt", encoding="UTF-8") as fp: for line in fp: @@ -133,8 +142,8 @@ class SymbolicReference(object): continue # END parse comment - # skip dereferenced tag object entries - previous line was actual - # tag reference for it + # Skip dereferenced tag object entries - previous line was actual + # tag reference for it. if line[0] == "^": continue @@ -143,17 +152,17 @@ class SymbolicReference(object): except OSError: return None # END no packed-refs file handling - # NOTE: Had try-finally block around here to close the fp, - # but some python version wouldn't allow yields within that. - # I believe files are closing themselves on destruction, so it is - # alright. @classmethod def dereference_recursive(cls, repo: "Repo", ref_path: Union[PathLike, None]) -> str: """ - :return: hexsha stored in the reference at the given ref_path, recursively dereferencing all - intermediate references as required - :param repo: the repository containing the reference at ref_path""" + :return: + hexsha stored in the reference at the given `ref_path`, recursively + dereferencing all intermediate references as required + + :param repo: + The repository containing the reference at `ref_path`. + """ while True: hexsha, ref_path = cls._get_ref_info(repo, ref_path) @@ -163,7 +172,10 @@ class SymbolicReference(object): @staticmethod def _check_ref_name_valid(ref_path: PathLike) -> None: - # Based on the rules described in https://git-scm.com/docs/git-check-ref-format/#_description + """Check a ref name for validity. + + This is based on the rules described in :manpage:`git-check-ref-format(1)`. + """ previous: Union[str, None] = None one_before_previous: Union[str, None] = None for c in str(ref_path): @@ -203,16 +215,20 @@ class SymbolicReference(object): elif any(component.endswith(".lock") for component in str(ref_path).split("/")): raise ValueError( f"Invalid reference '{ref_path}': references cannot have slash-separated components that end with" - f" '.lock'" + " '.lock'" ) @classmethod def _get_ref_info_helper( cls, repo: "Repo", ref_path: Union[PathLike, None] ) -> Union[Tuple[str, None], Tuple[None, str]]: - """Return: (str(sha), str(target_ref_path)) if available, the sha the file at - rela_path points to, or None. target_ref_path is the reference we - point to, or None""" + """ + :return: + *(str(sha), str(target_ref_path))*, where: + + * *sha* is of the file at rela_path points to if available, or ``None``. + * *target_ref_path* is the reference we point to, or ``None``. + """ if ref_path: cls._check_ref_name_valid(ref_path) @@ -221,18 +237,18 @@ class SymbolicReference(object): try: with open(os.path.join(repodir, str(ref_path)), "rt", encoding="UTF-8") as fp: value = fp.read().rstrip() - # Don't only split on spaces, but on whitespace, which allows to parse lines like + # Don't only split on spaces, but on whitespace, which allows to parse lines like: # 60b64ef992065e2600bfef6187a97f92398a9144 branch 'master' of git-server:/path/to/repo tokens = value.split() assert len(tokens) != 0 except OSError: - # Probably we are just packed, find our entry in the packed refs file + # Probably we are just packed. Find our entry in the packed refs file. # NOTE: We are not a symbolic ref if we are in a packed file, as these - # are excluded explicitly + # are excluded explicitly. for sha, path in cls._iter_packed_refs(repo): if path != ref_path: continue - # sha will be used + # sha will be used. tokens = sha, path break # END for each packed ref @@ -240,11 +256,11 @@ class SymbolicReference(object): if tokens is None: raise ValueError("Reference at %r does not exist" % ref_path) - # is it a reference ? + # Is it a reference? if tokens[0] == "ref:": return (None, tokens[1]) - # its a commit + # It's a commit. if repo.re_hexsha_only.match(tokens[0]): return (tokens[0], None) @@ -252,25 +268,32 @@ class SymbolicReference(object): @classmethod def _get_ref_info(cls, repo: "Repo", ref_path: Union[PathLike, None]) -> Union[Tuple[str, None], Tuple[None, str]]: - """Return: (str(sha), str(target_ref_path)) if available, the sha the file at - rela_path points to, or None. target_ref_path is the reference we - point to, or None""" + """ + :return: + *(str(sha), str(target_ref_path))*, where: + + * *sha* is of the file at rela_path points to if available, or ``None``. + * *target_ref_path* is the reference we point to, or ``None``. + """ return cls._get_ref_info_helper(repo, ref_path) - def _get_object(self) -> Commit_ish: + def _get_object(self) -> AnyGitObject: """ :return: - The object our ref currently refers to. Refs can be cached, they will - always point to the actual object as it gets re-created on each query""" - # have to be dynamic here as we may be a tag which can point to anything - # Our path will be resolved to the hexsha which will be used accordingly + The object our ref currently refers to. Refs can be cached, they will always + point to the actual object as it gets re-created on each query. + """ + # We have to be dynamic here as we may be a tag which can point to anything. + # Our path will be resolved to the hexsha which will be used accordingly. return Object.new_from_sha(self.repo, hex_to_bin(self.dereference_recursive(self.repo, self.path))) def _get_commit(self) -> "Commit": """ :return: - Commit object we point to, works for detached and non-detached - SymbolicReferences. The symbolic reference will be dereferenced recursively.""" + :class:`~git.objects.commit.Commit` object we point to. This works for + detached and non-detached :class:`SymbolicReference` instances. The symbolic + reference will be dereferenced recursively. + """ obj = self._get_object() if obj.type == "tag": obj = obj.object @@ -286,12 +309,17 @@ class SymbolicReference(object): commit: Union[Commit, "SymbolicReference", str], logmsg: Union[str, None] = None, ) -> "SymbolicReference": - """As set_object, but restricts the type of object to be a Commit + """Like :meth:`set_object`, but restricts the type of object to be a + :class:`~git.objects.commit.Commit`. + + :raise ValueError: + If `commit` is not a :class:`~git.objects.commit.Commit` object, nor does it + point to a commit. - :raise ValueError: If commit is not a Commit object or doesn't point to - a commit - :return: self""" - # check the type - assume the best if it is a base-string + :return: + self + """ + # Check the type - assume the best if it is a base-string. invalid_type = False if isinstance(commit, Object): invalid_type = commit.type != Commit.type @@ -309,25 +337,39 @@ class SymbolicReference(object): raise ValueError("Need commit, got %r" % commit) # END handle raise - # we leave strings to the rev-parse method below + # We leave strings to the rev-parse method below. self.set_object(commit, logmsg) return self def set_object( self, - object: Union[Commit_ish, "SymbolicReference", str], + object: Union[AnyGitObject, "SymbolicReference", str], logmsg: Union[str, None] = None, ) -> "SymbolicReference": - """Set the object we point to, possibly dereference our symbolic reference first. - If the reference does not exist, it will be created - - :param object: a refspec, a SymbolicReference or an Object instance. SymbolicReferences - will be dereferenced beforehand to obtain the object they point to - :param logmsg: If not None, the message will be used in the reflog entry to be - written. Otherwise the reflog is not altered - :note: plain SymbolicReferences may not actually point to objects by convention - :return: self""" + """Set the object we point to, possibly dereference our symbolic reference + first. If the reference does not exist, it will be created. + + :param object: + A refspec, a :class:`SymbolicReference` or an + :class:`~git.objects.base.Object` instance. + + * :class:`SymbolicReference` instances will be dereferenced beforehand to + obtain the git object they point to. + * :class:`~git.objects.base.Object` instances must represent git objects + (:class:`~git.types.AnyGitObject`). + + :param logmsg: + If not ``None``, the message will be used in the reflog entry to be written. + Otherwise the reflog is not altered. + + :note: + Plain :class:`SymbolicReference` instances may not actually point to objects + by convention. + + :return: + self + """ if isinstance(object, SymbolicReference): object = object.object # @ReservedAssignment # END resolve references @@ -345,13 +387,27 @@ class SymbolicReference(object): # set the commit on our reference return self._get_reference().set_object(object, logmsg) - commit = property(_get_commit, set_commit, doc="Query or set commits directly") # type: ignore - object = property(_get_object, set_object, doc="Return the object our ref currently refers to") # type: ignore + commit = property( + _get_commit, + set_commit, # type: ignore[arg-type] + doc="Query or set commits directly", + ) + + object = property( + _get_object, + set_object, # type: ignore[arg-type] + doc="Return the object our ref currently refers to", + ) def _get_reference(self) -> "SymbolicReference": - """:return: Reference Object we point to - :raise TypeError: If this symbolic reference is detached, hence it doesn't point - to a reference, but to a commit""" + """ + :return: + :class:`~git.refs.reference.Reference` object we point to + + :raise TypeError: + If this symbolic reference is detached, hence it doesn't point to a + reference, but to a commit. + """ sha, target_ref_path = self._get_ref_info(self.repo, self.path) if target_ref_path is None: raise TypeError("%s is a detached symbolic reference as it points to %r" % (self, sha)) @@ -359,26 +415,37 @@ class SymbolicReference(object): def set_reference( self, - ref: Union[Commit_ish, "SymbolicReference", str], + ref: Union[AnyGitObject, "SymbolicReference", str], logmsg: Union[str, None] = None, ) -> "SymbolicReference": - """Set ourselves to the given ref. It will stay a symbol if the ref is a Reference. - Otherwise an Object, given as Object instance or refspec, is assumed and if valid, - will be set which effectively detaches the reference if it was a purely - symbolic one. - - :param ref: SymbolicReference instance, Object instance or refspec string - Only if the ref is a SymbolicRef instance, we will point to it. Everything - else is dereferenced to obtain the actual object. - :param logmsg: If set to a string, the message will be used in the reflog. + """Set ourselves to the given `ref`. + + It will stay a symbol if the `ref` is a :class:`~git.refs.reference.Reference`. + + Otherwise a git object, specified as a :class:`~git.objects.base.Object` + instance or refspec, is assumed. If it is valid, this reference will be set to + it, which effectively detaches the reference if it was a purely symbolic one. + + :param ref: + A :class:`SymbolicReference` instance, an :class:`~git.objects.base.Object` + instance (specifically an :class:`~git.types.AnyGitObject`), or a refspec + string. Only if the ref is a :class:`SymbolicReference` instance, we will + point to it. Everything else is dereferenced to obtain the actual object. + + :param logmsg: + If set to a string, the message will be used in the reflog. Otherwise, a reflog entry is not written for the changed reference. The previous commit of the entry will be the commit we point to now. - See also: log_append() + See also: :meth:`log_append` - :return: self - :note: This symbolic reference will not be dereferenced. For that, see - ``set_object(...)``""" + :return: + self + + :note: + This symbolic reference will not be dereferenced. For that, see + :meth:`set_object`. + """ write_value = None obj = None if isinstance(ref, SymbolicReference): @@ -388,7 +455,7 @@ class SymbolicReference(object): write_value = ref.hexsha elif isinstance(ref, str): try: - obj = self.repo.rev_parse(ref + "^{}") # optionally deref tags + obj = self.repo.rev_parse(ref + "^{}") # Optionally dereference tags. write_value = obj.hexsha except (BadObject, BadName) as e: raise ValueError("Could not extract object from %s" % ref) from e @@ -428,18 +495,23 @@ class SymbolicReference(object): return self - # aliased reference + # Aliased reference reference: Union["Head", "TagReference", "RemoteReference", "Reference"] - reference = property(_get_reference, set_reference, doc="Returns the Reference we point to") # type: ignore + reference = property( # type: ignore[assignment] + _get_reference, + set_reference, # type: ignore[arg-type] + doc="Returns the Reference we point to", + ) ref = reference def is_valid(self) -> bool: """ :return: - True if the reference is valid, hence it can be read and points to - a valid object or reference.""" + ``True`` if the reference is valid, hence it can be read and points to a + valid object or reference. + """ try: - self.object + self.object # noqa: B018 except (OSError, ValueError): return False else: @@ -449,21 +521,25 @@ class SymbolicReference(object): def is_detached(self) -> bool: """ :return: - True if we are a detached reference, hence we point to a specific commit - instead to another reference""" + ``True`` if we are a detached reference, hence we point to a specific commit + instead to another reference. + """ try: - self.ref + self.ref # noqa: B018 return False except TypeError: return True def log(self) -> "RefLog": """ - :return: RefLog for this reference. Its last entry reflects the latest change - applied to this reference + :return: + :class:`~git.refs.log.RefLog` for this reference. + Its last entry reflects the latest change applied to this reference. - .. note:: As the log is parsed every time, its recommended to cache it for use - instead of calling this method repeatedly. It should be considered read-only.""" + :note: + As the log is parsed every time, its recommended to cache it for use instead + of calling this method repeatedly. It should be considered read-only. + """ return RefLog.from_file(RefLog.path(self)) def log_append( @@ -472,21 +548,28 @@ class SymbolicReference(object): message: Union[str, None], newbinsha: Union[bytes, None] = None, ) -> "RefLogEntry": - """Append a logentry to the logfile of this ref - - :param oldbinsha: binary sha this ref used to point to - :param message: A message describing the change - :param newbinsha: The sha the ref points to now. If None, our current commit sha - will be used - :return: added RefLogEntry instance""" - # NOTE: we use the committer of the currently active commit - this should be + """Append a logentry to the logfile of this ref. + + :param oldbinsha: + Binary sha this ref used to point to. + + :param message: + A message describing the change. + + :param newbinsha: + The sha the ref points to now. If None, our current commit sha will be used. + + :return: + The added :class:`~git.refs.log.RefLogEntry` instance. + """ + # NOTE: We use the committer of the currently active commit - this should be # correct to allow overriding the committer on a per-commit level. - # See https://github.com/gitpython-developers/GitPython/pull/146 + # See https://github.com/gitpython-developers/GitPython/pull/146. try: committer_or_reader: Union["Actor", "GitConfigParser"] = self.commit.committer except ValueError: committer_or_reader = self.repo.config_reader() - # end handle newly cloned repositories + # END handle newly cloned repositories if newbinsha is None: newbinsha = self.commit.binsha @@ -496,19 +579,28 @@ class SymbolicReference(object): return RefLog.append_entry(committer_or_reader, RefLog.path(self), oldbinsha, newbinsha, message) def log_entry(self, index: int) -> "RefLogEntry": - """:return: RefLogEntry at the given index - :param index: python list compatible positive or negative index + """ + :return: + :class:`~git.refs.log.RefLogEntry` at the given index - .. note:: This method must read part of the reflog during execution, hence - it should be used sparringly, or only if you need just one index. - In that case, it will be faster than the ``log()`` method""" + :param index: + Python list compatible positive or negative index. + + :note: + This method must read part of the reflog during execution, hence it should + be used sparingly, or only if you need just one index. In that case, it will + be faster than the :meth:`log` method. + """ return RefLog.entry_at(RefLog.path(self), index) @classmethod def to_full_path(cls, path: Union[PathLike, "SymbolicReference"]) -> PathLike: """ - :return: string with a full repository-relative path which can be used to initialize - a Reference instance, for instance by using ``Reference.from_path``""" + :return: + String with a full repository-relative path which can be used to initialize + a :class:`~git.refs.reference.Reference` instance, for instance by using + :meth:`Reference.from_path <git.refs.reference.Reference.from_path>`. + """ if isinstance(path, SymbolicReference): path = path.path full_ref_path = path @@ -520,21 +612,22 @@ class SymbolicReference(object): @classmethod def delete(cls, repo: "Repo", path: PathLike) -> None: - """Delete the reference at the given path + """Delete the reference at the given path. :param repo: - Repository to delete the reference from + Repository to delete the reference from. :param path: - Short or full path pointing to the reference, i.e. refs/myreference - or just "myreference", hence 'refs/' is implied. - Alternatively the symbolic reference to be deleted""" + Short or full path pointing to the reference, e.g. ``refs/myreference`` or + just ``myreference``, hence ``refs/`` is implied. + Alternatively the symbolic reference to be deleted. + """ full_ref_path = cls.to_full_path(path) abs_path = os.path.join(repo.common_dir, full_ref_path) if os.path.exists(abs_path): os.remove(abs_path) else: - # check packed refs + # Check packed refs. pack_file_path = cls._get_packed_refs_path(repo) try: with open(pack_file_path, "rb") as reader: @@ -545,10 +638,10 @@ class SymbolicReference(object): line = line_bytes.decode(defenc) _, _, line_ref = line.partition(" ") line_ref = line_ref.strip() - # keep line if it is a comment or if the ref to delete is not - # in the line - # If we deleted the last line and this one is a tag-reference object, - # we drop it as well + # Keep line if it is a comment or if the ref to delete is not in + # the line. + # If we deleted the last line and this one is a tag-reference + # object, we drop it as well. if (line.startswith("#") or full_ref_path != line_ref) and ( not dropped_last_line or dropped_last_line and not line.startswith("^") ): @@ -557,21 +650,21 @@ class SymbolicReference(object): continue # END skip comments and lines without our path - # drop this line + # Drop this line. made_change = True dropped_last_line = True - # write the new lines + # Write the new lines. if made_change: - # write-binary is required, otherwise windows will - # open the file in text mode and change LF to CRLF ! + # Binary writing is required, otherwise Windows will open the file + # in text mode and change LF to CRLF! with open(pack_file_path, "wb") as fd: fd.writelines(line.encode(defenc) for line in new_lines) except OSError: - pass # it didn't exist at all + pass # It didn't exist at all. - # delete the reflog + # Delete the reflog. reflog_path = RefLog.path(cls(repo, full_ref_path)) if os.path.isfile(reflog_path): os.remove(reflog_path) @@ -587,16 +680,17 @@ class SymbolicReference(object): force: bool, logmsg: Union[str, None] = None, ) -> T_References: - """internal method used to create a new symbolic reference. - If resolve is False, the reference will be taken as is, creating - a proper symbolic reference. Otherwise it will be resolved to the - corresponding object and a detached symbolic reference will be created - instead""" + """Internal method used to create a new symbolic reference. + + If `resolve` is ``False``, the reference will be taken as is, creating a proper + symbolic reference. Otherwise it will be resolved to the corresponding object + and a detached symbolic reference will be created instead. + """ git_dir = _git_dir(repo, path) full_ref_path = cls.to_full_path(path) abs_ref_path = os.path.join(git_dir, full_ref_path) - # figure out target data + # Figure out target data. target = reference if resolve: target = repo.rev_parse(str(reference)) @@ -630,50 +724,58 @@ class SymbolicReference(object): force: bool = False, **kwargs: Any, ) -> T_References: - """Create a new symbolic reference, hence a reference pointing , to another reference. + """Create a new symbolic reference: a reference pointing to another reference. :param repo: - Repository to create the reference in + Repository to create the reference in. :param path: - full path at which the new symbolic reference is supposed to be - created at, i.e. "NEW_HEAD" or "symrefs/my_new_symref" + Full path at which the new symbolic reference is supposed to be created at, + e.g. ``NEW_HEAD`` or ``symrefs/my_new_symref``. :param reference: - The reference to which the new symbolic reference should point to. - If it is a commit'ish, the symbolic ref will be detached. + The reference which the new symbolic reference should point to. + If it is a commit-ish, the symbolic ref will be detached. :param force: - if True, force creation even if a symbolic reference with that name already exists. - Raise OSError otherwise + If ``True``, force creation even if a symbolic reference with that name + already exists. Raise :exc:`OSError` otherwise. :param logmsg: - If not None, the message to append to the reflog. Otherwise no reflog - entry is written. + If not ``None``, the message to append to the reflog. + If ``None``, no reflog entry is written. - :return: Newly created symbolic Reference + :return: + Newly created symbolic reference :raise OSError: - If a (Symbolic)Reference with the same name but different contents - already exists. + If a (Symbolic)Reference with the same name but different contents already + exists. - :note: This does not alter the current HEAD, index or Working Tree""" + :note: + This does not alter the current HEAD, index or working tree. + """ return cls._create(repo, path, cls._resolve_ref_on_create, reference, force, logmsg) def rename(self, new_path: PathLike, force: bool = False) -> "SymbolicReference": - """Rename self to a new path + """Rename self to a new path. :param new_path: - Either a simple name or a full path, i.e. new_name or features/new_name. - The prefix refs/ is implied for references and will be set as needed. - In case this is a symbolic ref, there is no implied prefix + Either a simple name or a full path, e.g. ``new_name`` or + ``features/new_name``. + The prefix ``refs/`` is implied for references and will be set as needed. + In case this is a symbolic ref, there is no implied prefix. :param force: - If True, the rename will succeed even if a head with the target name - already exists. It will be overwritten in that case + If ``True``, the rename will succeed even if a head with the target name + already exists. It will be overwritten in that case. - :return: self - :raise OSError: In case a file at path but a different contents already exists""" + :return: + self + + :raise OSError: + If a file at path but with different contents already exists. + """ new_path = self.to_full_path(new_path) if self.path == new_path: return self @@ -682,15 +784,15 @@ class SymbolicReference(object): cur_abs_path = os.path.join(_git_dir(self.repo, self.path), self.path) if os.path.isfile(new_abs_path): if not force: - # if they point to the same file, its not an error + # If they point to the same file, it's not an error. with open(new_abs_path, "rb") as fd1: f1 = fd1.read().strip() with open(cur_abs_path, "rb") as fd2: f2 = fd2.read().strip() if f1 != f2: raise OSError("File at path %r already exists" % new_abs_path) - # else: we could remove ourselves and use the otherone, but - # but clarity we just continue as usual + # else: We could remove ourselves and use the other one, but... + # ...for clarity, we just continue as usual. # END not force handling os.remove(new_abs_path) # END handle existing target file @@ -713,10 +815,10 @@ class SymbolicReference(object): common_path = cls._common_path_default rela_paths = set() - # walk loose refs - # Currently we do not follow links + # Walk loose refs. + # Currently we do not follow links. for root, dirs, files in os.walk(join_path_native(repo.common_dir, common_path)): - if "refs" not in root.split(os.sep): # skip non-refs subfolders + if "refs" not in root.split(os.sep): # Skip non-refs subfolders. refs_id = [d for d in dirs if d == "refs"] if refs_id: dirs[0:] = ["refs"] @@ -730,14 +832,14 @@ class SymbolicReference(object): # END for each file in root directory # END for each directory to walk - # read packed refs + # Read packed refs. for _sha, rela_path in cls._iter_packed_refs(repo): if rela_path.startswith(str(common_path)): rela_paths.add(rela_path) # END relative path matches common path # END packed refs reading - # return paths in sorted order + # Yield paths in sorted order. for path in sorted(rela_paths): try: yield cls.from_path(repo, path) @@ -753,37 +855,48 @@ class SymbolicReference(object): *args: Any, **kwargs: Any, ) -> Iterator[T_References]: - """Find all refs in the repository + """Find all refs in the repository. - :param repo: is the Repo + :param repo: + The :class:`~git.repo.base.Repo`. :param common_path: - Optional keyword argument to the path which is to be shared by all - returned Ref objects. - Defaults to class specific portion if None assuring that only - refs suitable for the actual class are returned. + Optional keyword argument to the path which is to be shared by all returned + Ref objects. + Defaults to class specific portion if ``None``, ensuring that only refs + suitable for the actual class are returned. :return: - git.SymbolicReference[], each of them is guaranteed to be a symbolic - ref which is not detached and pointing to a valid ref + A list of :class:`SymbolicReference`, each guaranteed to be a symbolic ref + which is not detached and pointing to a valid ref. - List is lexicographically sorted - The returned objects represent actual subclasses, such as Head or TagReference""" - return (r for r in cls._iter_items(repo, common_path) if r.__class__ == SymbolicReference or not r.is_detached) + The list is lexicographically sorted. The returned objects are instances of + concrete subclasses, such as :class:`~git.refs.head.Head` or + :class:`~git.refs.tag.TagReference`. + """ + return (r for r in cls._iter_items(repo, common_path) if r.__class__ is SymbolicReference or not r.is_detached) @classmethod def from_path(cls: Type[T_References], repo: "Repo", path: PathLike) -> T_References: - """ - :param path: full .git-directory-relative path name to the Reference to instantiate - :note: use to_full_path() if you only have a partial path of a known Reference Type + """Make a symbolic reference from a path. + + :param path: + Full ``.git``-directory-relative path name to the Reference to instantiate. + + :note: + Use :meth:`to_full_path` if you only have a partial path of a known + Reference type. + :return: - Instance of type Reference, Head, or Tag - depending on the given path""" + Instance of type :class:`~git.refs.reference.Reference`, + :class:`~git.refs.head.Head`, or :class:`~git.refs.tag.Tag`, depending on + the given path. + """ if not path: raise ValueError("Cannot create Reference from %r" % path) - # Names like HEAD are inserted after the refs module is imported - we have an import dependency - # cycle and don't want to import these names in-function + # Names like HEAD are inserted after the refs module is imported - we have an + # import dependency cycle and don't want to import these names in-function. from . import HEAD, Head, RemoteReference, TagReference, Reference for ref_type in ( @@ -797,8 +910,8 @@ class SymbolicReference(object): try: instance: T_References instance = ref_type(repo, path) - if instance.__class__ == SymbolicReference and instance.is_detached: - raise ValueError("SymbolRef was detached, we drop it") + if instance.__class__ is SymbolicReference and instance.is_detached: + raise ValueError("SymbolicRef was detached, we drop it") else: return instance diff --git a/git/refs/tag.py b/git/refs/tag.py index d32d91b..1e38663 100644 --- a/git/refs/tag.py +++ b/git/refs/tag.py @@ -1,27 +1,35 @@ -from .reference import Reference +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Provides a :class:`~git.refs.reference.Reference`-based type for lightweight tags. + +This defines the :class:`TagReference` class (and its alias :class:`Tag`), which +represents lightweight tags. For annotated tags (which are git objects), see the +:mod:`git.objects.tag` module. +""" __all__ = ["TagReference", "Tag"] +from .reference import Reference + # typing ------------------------------------------------------------------ -from typing import Any, Type, Union, TYPE_CHECKING -from git.types import Commit_ish, PathLike +from typing import Any, TYPE_CHECKING, Type, Union + +from git.types import AnyGitObject, PathLike if TYPE_CHECKING: - from git.repo import Repo - from git.objects import Commit - from git.objects import TagObject + from git.objects import Commit, TagObject from git.refs import SymbolicReference - + from git.repo import Repo # ------------------------------------------------------------------------------ class TagReference(Reference): - - """Class representing a lightweight tag reference which either points to a commit - ,a tag object or any other object. In the latter case additional information, - like the signature or the tag-creator, is available. + """A lightweight tag reference which either points to a commit, a tag object or any + other object. In the latter case additional information, like the signature or the + tag-creator, is available. This tag object will always point to a commit object, but may carry additional information in a tag object:: @@ -29,9 +37,11 @@ class TagReference(Reference): tagref = TagReference.list_items(repo)[0] print(tagref.commit.message) if tagref.tag is not None: - print(tagref.tag.message)""" + print(tagref.tag.message) + """ __slots__ = () + _common_default = "tags" _common_path_default = Reference._common_path_default + "/" + _common_default @@ -39,11 +49,13 @@ class TagReference(Reference): def commit(self) -> "Commit": # type: ignore[override] # LazyMixin has unrelated commit method """:return: Commit object the tag ref points to - :raise ValueError: if the tag points to a tree or blob""" + :raise ValueError: + If the tag points to a tree or blob. + """ obj = self.object while obj.type != "commit": if obj.type == "tag": - # it is a tag object which carries the commit as an object - we can point to anything + # It is a tag object which carries the commit as an object - we can point to anything. obj = obj.object else: raise ValueError( @@ -58,19 +70,18 @@ class TagReference(Reference): @property def tag(self) -> Union["TagObject", None]: """ - :return: Tag object this tag ref points to or None in case - we are a light weight tag""" + :return: + Tag object this tag ref points to, or ``None`` in case we are a lightweight + tag + """ obj = self.object if obj.type == "tag": return obj return None - # make object read-only - # It should be reasonably hard to adjust an existing tag - - # object = property(Reference._get_object) + # Make object read-only. It should be reasonably hard to adjust an existing tag. @property - def object(self) -> Commit_ish: # type: ignore[override] + def object(self) -> AnyGitObject: # type: ignore[override] return Reference._get_object(self) @classmethod @@ -85,31 +96,37 @@ class TagReference(Reference): ) -> "TagReference": """Create a new tag reference. + :param repo: + The :class:`~git.repo.base.Repo` to create the tag in. + :param path: - The name of the tag, i.e. 1.0 or releases/1.0. - The prefix refs/tags is implied + The name of the tag, e.g. ``1.0`` or ``releases/1.0``. + The prefix ``refs/tags`` is implied. - :param ref: - A reference to the Object you want to tag. The Object can be a commit, tree or - blob. + :param reference: + A reference to the :class:`~git.objects.base.Object` you want to tag. + The referenced object can be a commit, tree, or blob. :param logmsg: - If not None, the message will be used in your tag object. This will also - create an additional tag object that allows to obtain that information, i.e.:: + If not ``None``, the message will be used in your tag object. This will also + create an additional tag object that allows to obtain that information, + e.g.:: tagref.tag.message :param message: - Synonym for :param logmsg: - Included for backwards compatibility. :param logmsg is used in preference if both given. + Synonym for the `logmsg` parameter. Included for backwards compatibility. + `logmsg` takes precedence if both are passed. :param force: - If True, to force creation of a tag even though that tag already exists. + If ``True``, force creation of a tag even though that tag already exists. :param kwargs: - Additional keyword arguments to be passed to git-tag + Additional keyword arguments to be passed to :manpage:`git-tag(1)`. - :return: A new TagReference""" + :return: + A new :class:`TagReference`. + """ if "ref" in kwargs and kwargs["ref"]: reference = kwargs["ref"] @@ -130,9 +147,9 @@ class TagReference(Reference): @classmethod def delete(cls, repo: "Repo", *tags: "TagReference") -> None: # type: ignore[override] - """Delete the given existing tag or tags""" + """Delete the given existing tag or tags.""" repo.git.tag("-d", *tags) -# provide an alias +# Provide an alias. Tag = TagReference diff --git a/git/remote.py b/git/remote.py index fc2b2ce..20e42b4 100644 --- a/git/remote.py +++ b/git/remote.py @@ -1,34 +1,30 @@ -# remote.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ -# Module implementing a remote object allowing easy access to git remotes +"""Module implementing a remote object allowing easy access to git remotes.""" + +__all__ = ["RemoteProgress", "PushInfo", "FetchInfo", "Remote"] + +import contextlib import logging import re -from git.cmd import handle_process_output, Git +from git.cmd import Git, handle_process_output from git.compat import defenc, force_text +from git.config import GitConfigParser, SectionConstraint, cp from git.exc import GitCommandError +from git.refs import Head, Reference, RemoteReference, SymbolicReference, TagReference from git.util import ( - LazyMixin, - IterableObj, + CallableRemoteProgress, IterableList, + IterableObj, + LazyMixin, RemoteProgress, - CallableRemoteProgress, -) -from git.util import ( join_path, ) -from git.config import ( - GitConfigParser, - SectionConstraint, - cp, -) -from git.refs import Head, Reference, RemoteReference, SymbolicReference, TagReference - # typing------------------------------------------------------- from typing import ( @@ -47,29 +43,18 @@ from typing import ( overload, ) -from git.types import PathLike, Literal, Commit_ish +from git.types import AnyGitObject, Literal, PathLike if TYPE_CHECKING: - from git.repo.base import Repo + from git.objects.commit import Commit from git.objects.submodule.base import UpdateProgress - - # from git.objects.commit import Commit - # from git.objects import Blob, Tree, TagObject + from git.repo.base import Repo flagKeyLiteral = Literal[" ", "!", "+", "-", "*", "=", "t", "?"] -# def is_flagKeyLiteral(inp: str) -> TypeGuard[flagKeyLiteral]: -# return inp in [' ', '!', '+', '-', '=', '*', 't', '?'] - - # ------------------------------------------------------------- - -log = logging.getLogger("git.remote") -log.addHandler(logging.NullHandler()) - - -__all__ = ("RemoteProgress", "PushInfo", "FetchInfo", "Remote") +_logger = logging.getLogger(__name__) # { Utilities @@ -79,10 +64,16 @@ def add_progress( git: Git, progress: Union[RemoteProgress, "UpdateProgress", Callable[..., RemoteProgress], None], ) -> Any: - """Add the --progress flag to the given kwargs dict if supported by the - git command. If the actual progress in the given progress instance is not - given, we do not request any progress - :return: possibly altered kwargs""" + """Add the ``--progress`` flag to the given `kwargs` dict if supported by the git + command. + + :note: + If the actual progress in the given progress instance is not given, we do not + request any progress. + + :return: + Possibly altered `kwargs` + """ if progress is not None: v = git.version_info[:2] if v >= (1, 7): @@ -96,39 +87,35 @@ def add_progress( @overload -def to_progress_instance(progress: None) -> RemoteProgress: - ... +def to_progress_instance(progress: None) -> RemoteProgress: ... @overload -def to_progress_instance(progress: Callable[..., Any]) -> CallableRemoteProgress: - ... +def to_progress_instance(progress: Callable[..., Any]) -> CallableRemoteProgress: ... @overload -def to_progress_instance(progress: RemoteProgress) -> RemoteProgress: - ... +def to_progress_instance(progress: RemoteProgress) -> RemoteProgress: ... def to_progress_instance( - progress: Union[Callable[..., Any], RemoteProgress, None] + progress: Union[Callable[..., Any], RemoteProgress, None], ) -> Union[RemoteProgress, CallableRemoteProgress]: - """Given the 'progress' return a suitable object derived from - RemoteProgress(). - """ - # new API only needs progress as a function + """Given the `progress` return a suitable object derived from + :class:`~git.util.RemoteProgress`.""" + # New API only needs progress as a function. if callable(progress): return CallableRemoteProgress(progress) - # where None is passed create a parser that eats the progress + # Where None is passed create a parser that eats the progress. elif progress is None: return RemoteProgress() - # assume its the old API with an instance of RemoteProgress. + # Assume its the old API with an instance of RemoteProgress. return progress -class PushInfo(IterableObj, object): +class PushInfo(IterableObj): """ Carries information about the result of a push operation of a single head:: @@ -152,6 +139,7 @@ class PushInfo(IterableObj, object): "_remote", "summary", ) + _id_attribute_ = "pushinfo" ( @@ -187,8 +175,10 @@ class PushInfo(IterableObj, object): old_commit: Optional[str] = None, summary: str = "", ) -> None: - """Initialize a new instance - local_ref: HEAD | Head | RemoteReference | TagReference | Reference | SymbolicReference | None""" + """Initialize a new instance. + + local_ref: HEAD | Head | RemoteReference | TagReference | Reference | SymbolicReference | None + """ self.flags = flags self.local_ref = local_ref self.remote_ref_string = remote_ref_string @@ -197,16 +187,18 @@ class PushInfo(IterableObj, object): self.summary = summary @property - def old_commit(self) -> Union[str, SymbolicReference, Commit_ish, None]: + def old_commit(self) -> Union["Commit", None]: return self._old_commit_sha and self._remote.repo.commit(self._old_commit_sha) or None @property def remote_ref(self) -> Union[RemoteReference, TagReference]: """ :return: - Remote Reference or TagReference in the local repository corresponding - to the remote_ref_string kept in this instance.""" - # translate heads to a local remote, tags stay as they are + Remote :class:`~git.refs.reference.Reference` or + :class:`~git.refs.tag.TagReference` in the local repository corresponding to + the :attr:`remote_ref_string` kept in this instance. + """ + # Translate heads to a local remote. Tags stay as they are. if self.remote_ref_string.startswith("refs/tags"): return TagReference(self._remote.repo, self.remote_ref_string) elif self.remote_ref_string.startswith("refs/heads"): @@ -221,12 +213,12 @@ class PushInfo(IterableObj, object): @classmethod def _from_line(cls, remote: "Remote", line: str) -> "PushInfo": - """Create a new PushInfo instance as parsed from line which is expected to be like - refs/heads/master:refs/heads/master 05d2687..1d0568e as bytes""" + """Create a new :class:`PushInfo` instance as parsed from line which is expected + to be like refs/heads/master:refs/heads/master 05d2687..1d0568e as bytes.""" control_character, from_to, summary = line.split("\t", 3) flags = 0 - # control character handling + # Control character handling try: flags |= cls._flag_map[control_character] except KeyError as e: @@ -243,7 +235,7 @@ class PushInfo(IterableObj, object): else: from_ref = Reference.from_path(remote.repo, from_ref_string) - # commit handling, could be message or commit info + # Commit handling, could be message or commit info old_commit: Optional[str] = None if summary.startswith("["): if "[rejected]" in summary: @@ -258,15 +250,15 @@ class PushInfo(IterableObj, object): flags |= cls.NEW_TAG elif "[new branch]" in summary: flags |= cls.NEW_HEAD - # uptodate encoded in control character + # `uptodate` encoded in control character else: - # fast-forward or forced update - was encoded in control character, - # but we parse the old and new commit + # Fast-forward or forced update - was encoded in control character, + # but we parse the old and new commit. split_token = "..." if control_character == " ": split_token = ".." old_sha, _new_sha = summary.split(" ")[0].split(split_token) - # have to use constructor here as the sha usually is abbreviated + # Have to use constructor here as the sha usually is abbreviated. old_commit = old_sha # END message handling @@ -278,9 +270,7 @@ class PushInfo(IterableObj, object): class PushInfoList(IterableList[PushInfo]): - """ - IterableList of PushInfo objects. - """ + """:class:`~git.util.IterableList` of :class:`PushInfo` objects.""" def __new__(cls) -> "PushInfoList": return cast(PushInfoList, IterableList.__new__(cls, "push_infos")) @@ -290,15 +280,12 @@ class PushInfoList(IterableList[PushInfo]): self.error: Optional[Exception] = None def raise_if_error(self) -> None: - """ - Raise an exception if any ref failed to push. - """ + """Raise an exception if any ref failed to push.""" if self.error: raise self.error -class FetchInfo(IterableObj, object): - +class FetchInfo(IterableObj): """ Carries information about the results of a fetch operation of a single head:: @@ -315,6 +302,7 @@ class FetchInfo(IterableObj, object): """ __slots__ = ("ref", "old_commit", "flags", "note", "remote_ref_path") + _id_attribute_ = "fetchinfo" ( @@ -328,7 +316,7 @@ class FetchInfo(IterableObj, object): ERROR, ) = [1 << x for x in range(8)] - _re_fetch_result = re.compile(r"^\s*(.) (\[[\w\s\.$@]+\]|[\w\.$@]+)\s+(.+) -> ([^\s]+)( \(.*\)?$)?") + _re_fetch_result = re.compile(r"^ *(?:.{0,3})(.) (\[[\w \.$@]+\]|[\w\.$@]+) +(.+) -> ([^ ]+)( \(.*\)?$)?") _flag_map: Dict[flagKeyLiteral, int] = { "!": ERROR, @@ -341,21 +329,18 @@ class FetchInfo(IterableObj, object): @classmethod def refresh(cls) -> Literal[True]: - """This gets called by the refresh function (see the top level - __init__). + """Update information about which :manpage:`git-fetch(1)` flags are supported + by the git executable being used. + + Called by the :func:`git.refresh` function in the top level ``__init__``. """ - # clear the old values in _flag_map - try: + # Clear the old values in _flag_map. + with contextlib.suppress(KeyError): del cls._flag_map["t"] - except KeyError: - pass - - try: + with contextlib.suppress(KeyError): del cls._flag_map["-"] - except KeyError: - pass - # set the value given the git version + # Set the value given the git version. if Git().version_info[:2] >= (2, 10): cls._flag_map["t"] = cls.TAG_UPDATE else: @@ -368,12 +353,10 @@ class FetchInfo(IterableObj, object): ref: SymbolicReference, flags: int, note: str = "", - old_commit: Union[Commit_ish, None] = None, + old_commit: Union[AnyGitObject, None] = None, remote_ref_path: Optional[PathLike] = None, ) -> None: - """ - Initialize a new instance - """ + """Initialize a new instance.""" self.ref = ref self.flags = flags self.note = note @@ -389,33 +372,37 @@ class FetchInfo(IterableObj, object): return self.ref.name @property - def commit(self) -> Commit_ish: + def commit(self) -> "Commit": """:return: Commit of our remote ref""" return self.ref.commit @classmethod def _from_line(cls, repo: "Repo", line: str, fetch_line: str) -> "FetchInfo": - """Parse information from the given line as returned by git-fetch -v - and return a new FetchInfo object representing this information. - - We can handle a line as follows: - "%c %-\\*s %-\\*s -> %s%s" - - Where c is either ' ', !, +, -, \\*, or = - ! means error - + means success forcing update - - means a tag was updated - * means birth of new branch or tag - = means the head was up to date ( and not moved ) - ' ' means a fast-forward - - fetch line is the corresponding line from FETCH_HEAD, like - acb0fa8b94ef421ad60c8507b634759a472cd56c not-for-merge branch '0.1.7RC' of /tmp/tmpya0vairemote_repo""" + """Parse information from the given line as returned by ``git-fetch -v`` and + return a new :class:`FetchInfo` object representing this information. + + We can handle a line as follows:: + + %c %-*s %-*s -> %s%s + + Where ``c`` is either a space, ``!``, ``+``, ``-``, ``*``, or ``=``: + + - '!' means error + - '+' means success forcing update + - '-' means a tag was updated + - '*' means birth of new branch or tag + - '=' means the head was up to date (and not moved) + - ' ' means a fast-forward + + `fetch_line` is the corresponding line from FETCH_HEAD, like:: + + acb0fa8b94ef421ad60c8507b634759a472cd56c not-for-merge branch '0.1.7RC' of /tmp/tmpya0vairemote_repo + """ match = cls._re_fetch_result.match(line) if match is None: raise ValueError("Failed to parse line: %r" % line) - # parse lines + # Parse lines. remote_local_ref_str: str ( control_character, @@ -424,7 +411,6 @@ class FetchInfo(IterableObj, object): remote_local_ref_str, note, ) = match.groups() - # assert is_flagKeyLiteral(control_character), f"{control_character}" control_character = cast(flagKeyLiteral, control_character) try: _new_hex_sha, _fetch_operation, fetch_note = fetch_line.split("\t") @@ -432,7 +418,7 @@ class FetchInfo(IterableObj, object): except ValueError as e: # unpack error raise ValueError("Failed to parse FETCH_HEAD line: %r" % fetch_line) from e - # parse flags from control_character + # Parse flags from control_character. flags = 0 try: flags |= cls._flag_map[control_character] @@ -440,8 +426,9 @@ class FetchInfo(IterableObj, object): raise ValueError("Control character %r unknown as parsed from line %r" % (control_character, line)) from e # END control char exception handling - # parse operation string for more info - makes no sense for symbolic refs, but we parse it anyway - old_commit: Union[Commit_ish, None] = None + # Parse operation string for more info. + # This makes no sense for symbolic refs, but we parse it anyway. + old_commit: Union[AnyGitObject, None] = None is_tag_operation = False if "rejected" in operation: flags |= cls.REJECTED @@ -460,45 +447,49 @@ class FetchInfo(IterableObj, object): old_commit = repo.rev_parse(operation.split(split_token)[0]) # END handle refspec - # handle FETCH_HEAD and figure out ref type + # Handle FETCH_HEAD and figure out ref type. # If we do not specify a target branch like master:refs/remotes/origin/master, # the fetch result is stored in FETCH_HEAD which destroys the rule we usually - # have. In that case we use a symbolic reference which is detached + # have. In that case we use a symbolic reference which is detached. ref_type: Optional[Type[SymbolicReference]] = None if remote_local_ref_str == "FETCH_HEAD": ref_type = SymbolicReference elif ref_type_name == "tag" or is_tag_operation: - # the ref_type_name can be branch, whereas we are still seeing a tag operation. It happens during - # testing, which is based on actual git operations + # The ref_type_name can be branch, whereas we are still seeing a tag + # operation. It happens during testing, which is based on actual git + # operations. ref_type = TagReference elif ref_type_name in ("remote-tracking", "branch"): - # note: remote-tracking is just the first part of the 'remote-tracking branch' token. - # We don't parse it correctly, but its enough to know what to do, and its new in git 1.7something + # Note: remote-tracking is just the first part of the + # 'remote-tracking branch' token. We don't parse it correctly, but it's + # enough to know what to do, and it's new in git 1.7something. ref_type = RemoteReference elif "/" in ref_type_name: - # If the fetch spec look something like this '+refs/pull/*:refs/heads/pull/*', and is thus pretty - # much anything the user wants, we will have trouble to determine what's going on - # For now, we assume the local ref is a Head + # If the fetch spec look something like '+refs/pull/*:refs/heads/pull/*', + # and is thus pretty much anything the user wants, we will have trouble + # determining what's going on. For now, we assume the local ref is a Head. ref_type = Head else: raise TypeError("Cannot handle reference type: %r" % ref_type_name) # END handle ref type - # create ref instance + # Create ref instance. if ref_type is SymbolicReference: remote_local_ref = ref_type(repo, "FETCH_HEAD") else: - # determine prefix. Tags are usually pulled into refs/tags, they may have subdirectories. - # It is not clear sometimes where exactly the item is, unless we have an absolute path as indicated - # by the 'ref/' prefix. Otherwise even a tag could be in refs/remotes, which is when it will have the - # 'tags/' subdirectory in its path. - # We don't want to test for actual existence, but try to figure everything out analytically. + # Determine prefix. Tags are usually pulled into refs/tags; they may have + # subdirectories. It is not clear sometimes where exactly the item is, + # unless we have an absolute path as indicated by the 'ref/' prefix. + # Otherwise even a tag could be in refs/remotes, which is when it will have + # the 'tags/' subdirectory in its path. We don't want to test for actual + # existence, but try to figure everything out analytically. ref_path: Optional[PathLike] = None remote_local_ref_str = remote_local_ref_str.strip() if remote_local_ref_str.startswith(Reference._common_path_default + "/"): - # always use actual type if we get absolute paths - # Will always be the case if something is fetched outside of refs/remotes (if its not a tag) + # Always use actual type if we get absolute paths. This will always be + # the case if something is fetched outside of refs/remotes (if its not a + # tag). ref_path = remote_local_ref_str if ref_type is not TagReference and not remote_local_ref_str.startswith( RemoteReference._common_path_default + "/" @@ -506,14 +497,14 @@ class FetchInfo(IterableObj, object): ref_type = Reference # END downgrade remote reference elif ref_type is TagReference and "tags/" in remote_local_ref_str: - # even though its a tag, it is located in refs/remotes + # Even though it's a tag, it is located in refs/remotes. ref_path = join_path(RemoteReference._common_path_default, remote_local_ref_str) else: ref_path = join_path(ref_type._common_path_default, remote_local_ref_str) # END obtain refpath - # even though the path could be within the git conventions, we make - # sure we respect whatever the user wanted, and disabled path checking + # Even though the path could be within the git conventions, we make sure we + # respect whatever the user wanted, and disabled path checking. remote_local_ref = ref_type(repo, ref_path, check_path=False) # END create ref instance @@ -527,16 +518,18 @@ class FetchInfo(IterableObj, object): class Remote(LazyMixin, IterableObj): - """Provides easy read and write access to a git remote. Everything not part of this interface is considered an option for the current - remote, allowing constructs like remote.pushurl to query the pushurl. + remote, allowing constructs like ``remote.pushurl`` to query the pushurl. - NOTE: When querying configuration, the configuration accessor will be cached - to speed up subsequent accesses.""" + :note: + When querying configuration, the configuration accessor will be cached to speed + up subsequent accesses. + """ __slots__ = ("repo", "name", "_config_reader") + _id_attribute_ = "name" unsafe_git_fetch_options = [ @@ -556,27 +549,33 @@ class Remote(LazyMixin, IterableObj): "--exec", ] + url: str # Obtained dynamically from _config_reader. See __getattr__ below. + """The URL configured for the remote.""" + def __init__(self, repo: "Repo", name: str) -> None: - """Initialize a remote instance + """Initialize a remote instance. + + :param repo: + The repository we are a remote of. - :param repo: The repository we are a remote of - :param name: the name of the remote, i.e. 'origin'""" + :param name: + The name of the remote, e.g. ``origin``. + """ self.repo = repo self.name = name - self.url: str def __getattr__(self, attr: str) -> Any: - """Allows to call this instance like - remote.special( \\*args, \\*\\*kwargs) to call git-remote special self.name""" + """Allows to call this instance like ``remote.special(*args, **kwargs)`` to + call ``git remote special self.name``.""" if attr == "_config_reader": - return super(Remote, self).__getattr__(attr) + return super().__getattr__(attr) - # sometimes, probably due to a bug in python itself, we are being called - # even though a slot of the same name exists + # Sometimes, probably due to a bug in Python itself, we are being called even + # though a slot of the same name exists. try: return self._config_reader.get(attr) except cp.NoOptionError: - return super(Remote, self).__getattr__(attr) + return super().__getattr__(attr) # END handle exception def _config_section_name(self) -> str: @@ -584,11 +583,14 @@ class Remote(LazyMixin, IterableObj): def _set_cache_(self, attr: str) -> None: if attr == "_config_reader": - # NOTE: This is cached as __getattr__ is overridden to return remote config values implicitly, such as - # in print(r.pushurl) - self._config_reader = SectionConstraint(self.repo.config_reader("repository"), self._config_section_name()) + # NOTE: This is cached as __getattr__ is overridden to return remote config + # values implicitly, such as in print(r.pushurl). + self._config_reader = SectionConstraint( + self.repo.config_reader("repository"), + self._config_section_name(), + ) else: - super(Remote, self)._set_cache_(attr) + super()._set_cache_(attr) def __str__(self) -> str: return self.name @@ -607,21 +609,22 @@ class Remote(LazyMixin, IterableObj): def exists(self) -> bool: """ - :return: True if this is a valid, existing remote. - Valid remotes have an entry in the repository's configuration""" + :return: + ``True`` if this is a valid, existing remote. + Valid remotes have an entry in the repository's configuration. + """ try: self.config_reader.get("url") return True except cp.NoOptionError: - # we have the section at least ... + # We have the section at least... return True except cp.NoSectionError: return False - # end @classmethod def iter_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> Iterator["Remote"]: - """:return: Iterator yielding Remote objects of the given repository""" + """:return: Iterator yielding :class:`Remote` objects of the given repository""" for section in repo.config_reader("repository").sections(): if not section.startswith("remote "): continue @@ -635,14 +638,21 @@ class Remote(LazyMixin, IterableObj): def set_url( self, new_url: str, old_url: Optional[str] = None, allow_unsafe_protocols: bool = False, **kwargs: Any ) -> "Remote": - """Configure URLs on current remote (cf command git remote set_url) + """Configure URLs on current remote (cf. command ``git remote set-url``). This command manages URLs on the remote. - :param new_url: string being the URL to add as an extra remote URL - :param old_url: when set, replaces this URL with new_url for the remote - :param allow_unsafe_protocols: Allow unsafe protocols to be used, like ext - :return: self + :param new_url: + String being the URL to add as an extra remote URL. + + :param old_url: + When set, replaces this URL with `new_url` for the remote. + + :param allow_unsafe_protocols: + Allow unsafe protocols to be used, like ``ext``. + + :return: + self """ if not allow_unsafe_protocols: Git.check_unsafe_protocols(new_url) @@ -655,25 +665,33 @@ class Remote(LazyMixin, IterableObj): return self def add_url(self, url: str, allow_unsafe_protocols: bool = False, **kwargs: Any) -> "Remote": - """Adds a new url on current remote (special case of git remote set_url) + """Adds a new url on current remote (special case of ``git remote set-url``). This command adds new URLs to a given remote, making it possible to have multiple URLs for a single remote. - :param url: string being the URL to add as an extra remote URL - :param allow_unsafe_protocols: Allow unsafe protocols to be used, like ext - :return: self + :param url: + String being the URL to add as an extra remote URL. + + :param allow_unsafe_protocols: + Allow unsafe protocols to be used, like ``ext``. + + :return: + self """ return self.set_url(url, add=True, allow_unsafe_protocols=allow_unsafe_protocols) def delete_url(self, url: str, **kwargs: Any) -> "Remote": - """Deletes a new url on current remote (special case of git remote set_url) + """Deletes a new url on current remote (special case of ``git remote set-url``). This command deletes new URLs to a given remote, making it possible to have multiple URLs for a single remote. - :param url: string being the URL to delete from the remote - :return: self + :param url: + String being the URL to delete from the remote. + + :return: + self """ return self.set_url(url, delete=True) @@ -700,7 +718,7 @@ class Remote(LazyMixin, IterableObj): yield line.split(": ")[-1] except GitCommandError as _ex: if any(msg in str(_ex) for msg in ["correct access rights", "cannot run ssh"]): - # If ssh is not setup to access this repository, see issue 694 + # If ssh is not setup to access this repository, see issue 694. remote_details = self.repo.git.config("--get-all", "remote.%s.url" % self.name) assert isinstance(remote_details, str) for line in remote_details.split("\n"): @@ -714,9 +732,13 @@ class Remote(LazyMixin, IterableObj): def refs(self) -> IterableList[RemoteReference]: """ :return: - IterableList of RemoteReference objects. It is prefixed, allowing - you to omit the remote path portion, i.e.:: - remote.refs.master # yields RemoteReference('/refs/remotes/origin/master')""" + :class:`~git.util.IterableList` of :class:`~git.refs.remote.RemoteReference` + objects. + + It is prefixed, allowing you to omit the remote path portion, e.g.:: + + remote.refs.master # yields RemoteReference('/refs/remotes/origin/master') + """ out_refs: IterableList[RemoteReference] = IterableList(RemoteReference._id_attribute_, "%s/" % self.name) out_refs.extend(RemoteReference.list_items(self.repo, remote=self.name)) return out_refs @@ -725,12 +747,13 @@ class Remote(LazyMixin, IterableObj): def stale_refs(self) -> IterableList[Reference]: """ :return: - IterableList RemoteReference objects that do not have a corresponding - head in the remote reference anymore as they have been deleted on the - remote side, but are still available locally. + :class:`~git.util.IterableList` of :class:`~git.refs.remote.RemoteReference` + objects that do not have a corresponding head in the remote reference + anymore as they have been deleted on the remote side, but are still + available locally. - The IterableList is prefixed, hence the 'origin' must be omitted. See - 'refs' property for an example. + The :class:`~git.util.IterableList` is prefixed, hence the 'origin' must be + omitted. See :attr:`refs` property for an example. To make things more complicated, it can be possible for the list to include other kinds of references, for example, tag references, if these are stale @@ -745,27 +768,41 @@ class Remote(LazyMixin, IterableObj): if not line.startswith(token): continue ref_name = line.replace(token, "") - # sometimes, paths start with a full ref name, like refs/tags/foo, see #260 + # Sometimes, paths start with a full ref name, like refs/tags/foo. See #260. if ref_name.startswith(Reference._common_path_default + "/"): out_refs.append(Reference.from_path(self.repo, ref_name)) else: fqhn = "%s/%s" % (RemoteReference._common_path_default, ref_name) out_refs.append(RemoteReference(self.repo, fqhn)) - # end special case handling + # END special case handling # END for each line return out_refs @classmethod def create(cls, repo: "Repo", name: str, url: str, allow_unsafe_protocols: bool = False, **kwargs: Any) -> "Remote": - """Create a new remote to the given repository - - :param repo: Repository instance that is to receive the new remote - :param name: Desired name of the remote - :param url: URL which corresponds to the remote's name - :param allow_unsafe_protocols: Allow unsafe protocols to be used, like ext - :param kwargs: Additional arguments to be passed to the git-remote add command - :return: New Remote instance - :raise GitCommandError: in case an origin with that name already exists""" + """Create a new remote to the given repository. + + :param repo: + Repository instance that is to receive the new remote. + + :param name: + Desired name of the remote. + + :param url: + URL which corresponds to the remote's name. + + :param allow_unsafe_protocols: + Allow unsafe protocols to be used, like ``ext``. + + :param kwargs: + Additional arguments to be passed to the ``git remote add`` command. + + :return: + New :class:`Remote` instance + + :raise git.exc.GitCommandError: + In case an origin with that name already exists. + """ scmd = "add" kwargs["insert_kwargs_after"] = scmd url = Git.polish_url(url) @@ -774,29 +811,39 @@ class Remote(LazyMixin, IterableObj): repo.git.remote(scmd, "--", name, url, **kwargs) return cls(repo, name) - # add is an alias + # `add` is an alias. @classmethod def add(cls, repo: "Repo", name: str, url: str, **kwargs: Any) -> "Remote": return cls.create(repo, name, url, **kwargs) @classmethod def remove(cls, repo: "Repo", name: str) -> str: - """Remove the remote with the given name + """Remove the remote with the given name. - :return: the passed remote name to remove + :return: + The passed remote name to remove """ repo.git.remote("rm", name) if isinstance(name, cls): name._clear_cache() return name - # alias - rm = remove + @classmethod + def rm(cls, repo: "Repo", name: str) -> str: + """Alias of remove. + Remove the remote with the given name. + + :return: + The passed remote name to remove + """ + return cls.remove(repo, name) def rename(self, new_name: str) -> "Remote": - """Rename self to the given new_name + """Rename self to the given `new_name`. - :return: self""" + :return: + self + """ if self.name == new_name: return self @@ -807,14 +854,16 @@ class Remote(LazyMixin, IterableObj): return self def update(self, **kwargs: Any) -> "Remote": - """Fetch all changes for this remote, including new branches which will - be forced in ( in case your local remote branch is not part the new remote branches - ancestry anymore ). + """Fetch all changes for this remote, including new branches which will be + forced in (in case your local remote branch is not part the new remote branch's + ancestry anymore). :param kwargs: - Additional arguments passed to git-remote update + Additional arguments passed to ``git remote update``. - :return: self""" + :return: + self + """ scmd = "update" kwargs["insert_kwargs_after"] = scmd self.repo.git.remote(scmd, self.name, **kwargs) @@ -828,15 +877,15 @@ class Remote(LazyMixin, IterableObj): ) -> IterableList["FetchInfo"]: progress = to_progress_instance(progress) - # skip first line as it is some remote info we are not interested in + # Skip first line as it is some remote info we are not interested in. output: IterableList["FetchInfo"] = IterableList("name") - # lines which are no progress are fetch info lines - # this also waits for the command to finish - # Skip some progress lines that don't provide relevant information + # Lines which are no progress are fetch info lines. + # This also waits for the command to finish. + # Skip some progress lines that don't provide relevant information. fetch_info_lines = [] - # Basically we want all fetch info lines which appear to be in regular form, and thus have a - # command character. Everything else we ignore, + # Basically we want all fetch info lines which appear to be in regular form, and + # thus have a command character. Everything else we ignore. cmds = set(FetchInfo._flag_map.keys()) progress_handler = progress.new_message_handler() @@ -852,7 +901,7 @@ class Remote(LazyMixin, IterableObj): stderr_text = progress.error_lines and "\n".join(progress.error_lines) or "" proc.wait(stderr=stderr_text) if stderr_text: - log.warning("Error lines received while fetching: %s", stderr_text) + _logger.warning("Error lines received while fetching: %s", stderr_text) for line in progress.other_lines: line = force_text(line) @@ -861,7 +910,7 @@ class Remote(LazyMixin, IterableObj): fetch_info_lines.append(line) continue - # read head information + # Read head information. fetch_head = SymbolicReference(self.repo, "FETCH_HEAD") with open(fetch_head.abspath, "rb") as fp: fetch_head_info = [line.decode(defenc) for line in fp.readlines()] @@ -873,22 +922,22 @@ class Remote(LazyMixin, IterableObj): msg += "length of progress lines %i should be equal to lines in FETCH_HEAD file %i\n" msg += "Will ignore extra progress lines or fetch head lines." msg %= (l_fil, l_fhi) - log.debug(msg) - log.debug(b"info lines: " + str(fetch_info_lines).encode("UTF-8")) - log.debug(b"head info: " + str(fetch_head_info).encode("UTF-8")) + _logger.debug(msg) + _logger.debug(b"info lines: " + str(fetch_info_lines).encode("UTF-8")) + _logger.debug(b"head info: " + str(fetch_head_info).encode("UTF-8")) if l_fil < l_fhi: fetch_head_info = fetch_head_info[:l_fil] else: fetch_info_lines = fetch_info_lines[:l_fhi] - # end truncate correct list - # end sanity check + sanitization + # END truncate correct list + # END sanity check + sanitization for err_line, fetch_line in zip(fetch_info_lines, fetch_head_info): try: output.append(FetchInfo._from_line(self.repo, err_line, fetch_line)) except ValueError as exc: - log.debug("Caught error while parsing line: %s", exc) - log.warning("Git informed while fetching: %s", err_line.strip()) + _logger.debug("Caught error while parsing line: %s", exc) + _logger.warning("Git informed while fetching: %s", err_line.strip()) return output def _get_push_info( @@ -899,10 +948,10 @@ class Remote(LazyMixin, IterableObj): ) -> PushInfoList: progress = to_progress_instance(progress) - # read progress information from stderr - # we hope stdout can hold all the data, it should ... - # read the lines manually as it will use carriage returns between the messages - # to override the previous one. This is why we read the bytes manually + # Read progress information from stderr. + # We hope stdout can hold all the data, it should... + # Read the lines manually as it will use carriage returns between the messages + # to override the previous one. This is why we read the bytes manually. progress_handler = progress.new_message_handler() output: PushInfoList = PushInfoList() @@ -925,18 +974,18 @@ class Remote(LazyMixin, IterableObj): try: proc.wait(stderr=stderr_text) except Exception as e: - # This is different than fetch (which fails if there is any std_err - # even if there is an output) + # This is different than fetch (which fails if there is any stderr + # even if there is an output). if not output: raise elif stderr_text: - log.warning("Error lines received while fetching: %s", stderr_text) + _logger.warning("Error lines received while fetching: %s", stderr_text) output.error = e return output def _assert_refspec(self) -> None: - """Turns out we can't deal with remotes if the refspec is missing""" + """Turns out we can't deal with remotes if the refspec is missing.""" config = self.config_reader unset = "placeholder" try: @@ -958,38 +1007,51 @@ class Remote(LazyMixin, IterableObj): allow_unsafe_options: bool = False, **kwargs: Any, ) -> IterableList[FetchInfo]: - """Fetch the latest changes for this remote + """Fetch the latest changes for this remote. :param refspec: A "refspec" is used by fetch and push to describe the mapping between remote ref and local ref. They are combined with a colon in - the format <src>:<dst>, preceded by an optional plus sign, +. - For example: git fetch $URL refs/heads/master:refs/heads/origin means + the format ``<src>:<dst>``, preceded by an optional plus sign, ``+``. + For example: ``git fetch $URL refs/heads/master:refs/heads/origin`` means "grab the master branch head from the $URL and store it as my origin - branch head". And git push $URL refs/heads/master:refs/heads/to-upstream + branch head". And ``git push $URL refs/heads/master:refs/heads/to-upstream`` means "publish my master branch head as to-upstream branch at $URL". - See also git-push(1). + See also :manpage:`git-push(1)`. - Taken from the git manual + Taken from the git manual, :manpage:`gitglossary(7)`. + + Fetch supports multiple refspecs (as the underlying :manpage:`git-fetch(1)` + does) - supplying a list rather than a string for 'refspec' will make use of + this facility. + + :param progress: + See the :meth:`push` method. + + :param verbose: + Boolean for verbose output. - Fetch supports multiple refspecs (as the - underlying git-fetch does) - supplying a list rather than a string - for 'refspec' will make use of this facility. - :param progress: See 'push' method - :param verbose: Boolean for verbose output :param kill_after_timeout: To specify a timeout in seconds for the git command, after which the process - should be killed. It is set to None by default. - :param allow_unsafe_protocols: Allow unsafe protocols to be used, like ext - :param allow_unsafe_options: Allow unsafe options to be used, like --upload-pack - :param kwargs: Additional arguments to be passed to git-fetch + should be killed. It is set to ``None`` by default. + + :param allow_unsafe_protocols: + Allow unsafe protocols to be used, like ``ext``. + + :param allow_unsafe_options: + Allow unsafe options to be used, like ``--upload-pack``. + + :param kwargs: + Additional arguments to be passed to :manpage:`git-fetch(1)`. + :return: - IterableList(FetchInfo, ...) list of FetchInfo instances providing detailed - information about the fetch results + IterableList(FetchInfo, ...) list of :class:`FetchInfo` instances providing + detailed information about the fetch results :note: As fetch does not provide progress information to non-ttys, we cannot make - it available here unfortunately as in the 'push' method.""" + it available here unfortunately as in the :meth:`push` method. + """ if refspec is None: # No argument refspec, then ensure the repo's config has a fetch refspec. self._assert_refspec() @@ -1025,16 +1087,30 @@ class Remote(LazyMixin, IterableObj): allow_unsafe_options: bool = False, **kwargs: Any, ) -> IterableList[FetchInfo]: - """Pull changes from the given branch, being the same as a fetch followed - by a merge of branch with your local branch. - - :param refspec: see :meth:`fetch` method - :param progress: see :meth:`push` method - :param kill_after_timeout: see :meth:`fetch` method - :param allow_unsafe_protocols: Allow unsafe protocols to be used, like ext - :param allow_unsafe_options: Allow unsafe options to be used, like --upload-pack - :param kwargs: Additional arguments to be passed to git-pull - :return: Please see :meth:`fetch` method""" + """Pull changes from the given branch, being the same as a fetch followed by a + merge of branch with your local branch. + + :param refspec: + See :meth:`fetch` method. + + :param progress: + See :meth:`push` method. + + :param kill_after_timeout: + See :meth:`fetch` method. + + :param allow_unsafe_protocols: + Allow unsafe protocols to be used, like ``ext``. + + :param allow_unsafe_options: + Allow unsafe options to be used, like ``--upload-pack``. + + :param kwargs: + Additional arguments to be passed to :manpage:`git-pull(1)`. + + :return: + Please see :meth:`fetch` method. + """ if refspec is None: # No argument refspec, then ensure the repo's config has a fetch refspec. self._assert_refspec() @@ -1067,33 +1143,50 @@ class Remote(LazyMixin, IterableObj): ) -> PushInfoList: """Push changes from source branch in refspec to target branch in refspec. - :param refspec: see 'fetch' method + :param refspec: + See :meth:`fetch` method. + :param progress: Can take one of many value types: - * None to discard progress information + * ``None``, to discard progress information. * A function (callable) that is called with the progress information. Signature: ``progress(op_code, cur_count, max_count=None, message='')``. - `Click here <http://goo.gl/NPa7st>`__ for a description of all arguments - given to the function. - * An instance of a class derived from ``git.RemoteProgress`` that - overrides the ``update()`` function. + See :meth:`RemoteProgress.update <git.util.RemoteProgress.update>` for a + description of all arguments given to the function. + * An instance of a class derived from :class:`~git.util.RemoteProgress` that + overrides the + :meth:`RemoteProgress.update <git.util.RemoteProgress.update>` method. + + :note: + No further progress information is returned after push returns. - :note: No further progress information is returned after push returns. :param kill_after_timeout: To specify a timeout in seconds for the git command, after which the process - should be killed. It is set to None by default. - :param allow_unsafe_protocols: Allow unsafe protocols to be used, like ext - :param allow_unsafe_options: Allow unsafe options to be used, like --receive-pack - :param kwargs: Additional arguments to be passed to git-push + should be killed. It is set to ``None`` by default. + + :param allow_unsafe_protocols: + Allow unsafe protocols to be used, like ``ext``. + + :param allow_unsafe_options: + Allow unsafe options to be used, like ``--receive-pack``. + + :param kwargs: + Additional arguments to be passed to :manpage:`git-push(1)`. + :return: - A ``PushInfoList`` object, where each list member - represents an individual head which had been updated on the remote side. - If the push contains rejected heads, these will have the PushInfo.ERROR bit set - in their flags. - If the operation fails completely, the length of the returned PushInfoList will - be 0. - Call ``.raise_if_error()`` on the returned object to raise on any failure.""" + A :class:`PushInfoList` object, where each list member represents an + individual head which had been updated on the remote side. + + If the push contains rejected heads, these will have the + :const:`PushInfo.ERROR` bit set in their flags. + + If the operation fails completely, the length of the returned + :class:`PushInfoList` will be 0. + + Call :meth:`~PushInfoList.raise_if_error` on the returned object to raise on + any failure. + """ kwargs = add_progress(kwargs, self.repo.git, progress) refspec = Git._unpack_args(refspec or []) @@ -1120,8 +1213,10 @@ class Remote(LazyMixin, IterableObj): def config_reader(self) -> SectionConstraint[GitConfigParser]: """ :return: - GitConfigParser compatible object able to read options for only our remote. - Hence you may simple type config.get("pushurl") to obtain the information""" + :class:`~git.config.GitConfigParser` compatible object able to read options + for only our remote. Hence you may simply type ``config.get("pushurl")`` to + obtain the information. + """ return self._config_reader def _clear_cache(self) -> None: @@ -1134,16 +1229,20 @@ class Remote(LazyMixin, IterableObj): @property def config_writer(self) -> SectionConstraint: """ - :return: GitConfigParser compatible object able to write options for this remote. + :return: + :class:`~git.config.GitConfigParser`-compatible object able to write options + for this remote. + :note: You can only own one writer at a time - delete it to release the configuration file and make it usable by others. To assure consistent results, you should only query options through the writer. Once you are done writing, you are free to use the config reader - once again.""" + once again. + """ writer = self.repo.config_writer() - # clear our cache to assure we re-read the possibly changed configuration + # Clear our cache to ensure we re-read the possibly changed configuration. self._clear_cache() return SectionConstraint(writer, self._config_section_name()) diff --git a/git/repo/__init__.py b/git/repo/__init__.py index 23c18db..66319ef 100644 --- a/git/repo/__init__.py +++ b/git/repo/__init__.py @@ -1,3 +1,8 @@ -"""Initialize the Repo package""" -# flake8: noqa -from .base import Repo as Repo +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Initialize the repo package.""" + +__all__ = ["Repo"] + +from .base import Repo diff --git a/git/repo/base.py b/git/repo/base.py index bc1b887..db89cdf 100644 --- a/git/repo/base.py +++ b/git/repo/base.py @@ -1,27 +1,28 @@ -# repo.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + from __future__ import annotations + +__all__ = ["Repo"] + +import gc import logging import os +import os.path as osp +from pathlib import Path import re import shlex +import sys import warnings -from pathlib import Path - +import gitdb from gitdb.db.loose import LooseObjectDB - from gitdb.exc import BadObject from git.cmd import Git, handle_process_output -from git.compat import ( - defenc, - safe_decode, - is_win, -) +from git.compat import defenc, safe_decode from git.config import GitConfigParser from git.db import GitCmdObjectDB from git.exc import ( @@ -35,32 +36,29 @@ from git.refs import HEAD, Head, Reference, TagReference from git.remote import Remote, add_progress, to_progress_instance from git.util import ( Actor, - finalize_process, cygpath, - hex_to_bin, expand_path, + finalize_process, + hex_to_bin, remove_password_if_present, ) -import os.path as osp from .fun import ( - rev_parse, - is_git_dir, find_submodule_git_dir, - touch, find_worktree_git_dir, + is_git_dir, + rev_parse, + touch, ) -import gc -import gitdb # typing ------------------------------------------------------ from git.types import ( - TBD, - PathLike, - Lit_config_levels, - Commit_ish, CallableProgress, + Commit_ish, + Lit_config_levels, + PathLike, + TBD, Tree_ish, assert_never, ) @@ -72,90 +70,104 @@ from typing import ( Iterator, List, Mapping, + NamedTuple, Optional, Sequence, + TYPE_CHECKING, TextIO, Tuple, Type, Union, - NamedTuple, cast, - TYPE_CHECKING, ) from git.types import ConfigLevels_Tup, TypedDict if TYPE_CHECKING: - from git.util import IterableList - from git.refs.symbolic import SymbolicReference from git.objects import Tree from git.objects.submodule.base import UpdateProgress + from git.refs.symbolic import SymbolicReference from git.remote import RemoteProgress + from git.util import IterableList # ----------------------------------------------------------- -log = logging.getLogger(__name__) - -__all__ = ("Repo",) +_logger = logging.getLogger(__name__) class BlameEntry(NamedTuple): - commit: Dict[str, "Commit"] + commit: Dict[str, Commit] linenos: range orig_path: Optional[str] orig_linenos: range -class Repo(object): - """Represents a git repository and allows you to query references, - gather commit information, generate diffs, create and clone repositories query - the log. +class Repo: + """Represents a git repository and allows you to query references, create commit + information, generate diffs, create and clone repositories, and query the log. The following attributes are worth using: - 'working_dir' is the working directory of the git command, which is the working tree - directory if available or the .git directory in case of bare repositories + * :attr:`working_dir` is the working directory of the git command, which is the + working tree directory if available or the ``.git`` directory in case of bare + repositories. - 'working_tree_dir' is the working tree directory, but will return None - if we are a bare repository. + * :attr:`working_tree_dir` is the working tree directory, but will return ``None`` + if we are a bare repository. - 'git_dir' is the .git repository directory, which is always set.""" + * :attr:`git_dir` is the ``.git`` repository directory, which is always set. + """ DAEMON_EXPORT_FILE = "git-daemon-export-ok" - git = cast("Git", None) # Must exist, or __del__ will fail in case we raise on `__init__()` + # Must exist, or __del__ will fail in case we raise on `__init__()`. + git = cast("Git", None) + working_dir: PathLike + """The working directory of the git command.""" + _working_tree_dir: Optional[PathLike] = None + git_dir: PathLike + """The ``.git`` repository directory.""" + _common_dir: PathLike = "" - # precompiled regex + # Precompiled regex re_whitespace = re.compile(r"\s+") - re_hexsha_only = re.compile("^[0-9A-Fa-f]{40}$") - re_hexsha_shortened = re.compile("^[0-9A-Fa-f]{4,40}$") + re_hexsha_only = re.compile(r"^[0-9A-Fa-f]{40}$") + re_hexsha_shortened = re.compile(r"^[0-9A-Fa-f]{4,40}$") re_envvars = re.compile(r"(\$(\{\s?)?[a-zA-Z_]\w*(\}\s?)?|%\s?[a-zA-Z_]\w*\s?%)") re_author_committer_start = re.compile(r"^(author|committer)") re_tab_full_line = re.compile(r"^\t(.*)$") unsafe_git_clone_options = [ - # This option allows users to execute arbitrary commands. - # https://git-scm.com/docs/git-clone#Documentation/git-clone.txt---upload-packltupload-packgt + # Executes arbitrary commands: "--upload-pack", "-u", - # Users can override configuration variables - # like `protocol.allow` or `core.gitProxy` to execute arbitrary commands. - # https://git-scm.com/docs/git-clone#Documentation/git-clone.txt---configltkeygtltvaluegt + # Can override configuration variables that execute arbitrary commands: "--config", "-c", ] + """Options to :manpage:`git-clone(1)` that allow arbitrary commands to be executed. + + The ``--upload-pack``/``-u`` option allows users to execute arbitrary commands + directly: + https://git-scm.com/docs/git-clone#Documentation/git-clone.txt---upload-packltupload-packgt - # invariants - # represents the configuration level of a configuration file + The ``--config``/``-c`` option allows users to override configuration variables like + ``protocol.allow`` and ``core.gitProxy`` to execute arbitrary commands: + https://git-scm.com/docs/git-clone#Documentation/git-clone.txt---configltkeygtltvaluegt + """ + + # Invariants config_level: ConfigLevels_Tup = ("system", "user", "global", "repository") + """Represents the configuration level of a configuration file.""" # Subclass configuration - # Subclasses may easily bring in their own custom types by placing a constructor or type here GitCommandWrapperType = Git + """Subclasses may easily bring in their own custom types by placing a constructor or + type here.""" def __init__( self, @@ -164,41 +176,50 @@ class Repo(object): search_parent_directories: bool = False, expand_vars: bool = True, ) -> None: - """Create a new Repo instance + R"""Create a new :class:`Repo` instance. :param path: - the path to either the root git directory or the bare git repo:: + The path to either the worktree directory or the .git directory itself:: repo = Repo("/Users/mtrier/Development/git-python") repo = Repo("/Users/mtrier/Development/git-python.git") repo = Repo("~/Development/git-python.git") repo = Repo("$REPOSITORIES/Development/git-python.git") - repo = Repo("C:\\Users\\mtrier\\Development\\git-python\\.git") + repo = Repo(R"C:\Users\mtrier\Development\git-python\.git") + + - In *Cygwin*, `path` may be a ``cygdrive/...`` prefixed path. + - If `path` is ``None`` or an empty string, :envvar:`GIT_DIR` is used. If + that environment variable is absent or empty, the current directory is + used. - - In *Cygwin*, path may be a `'cygdrive/...'` prefixed path. - - If it evaluates to false, :envvar:`GIT_DIR` is used, and if this also evals to false, - the current-directory is used. :param odbt: - Object DataBase type - a type which is constructed by providing - the directory containing the database objects, i.e. .git/objects. It will - be used to access all object data + Object DataBase type - a type which is constructed by providing the + directory containing the database objects, i.e. ``.git/objects``. It will be + used to access all object data. + :param search_parent_directories: - if True, all parent directories will be searched for a valid repo as well. + If ``True``, all parent directories will be searched for a valid repo as + well. - Please note that this was the default behaviour in older versions of GitPython, - which is considered a bug though. - :raise InvalidGitRepositoryError: - :raise NoSuchPathError: - :return: git.Repo""" + Please note that this was the default behaviour in older versions of + GitPython, which is considered a bug though. + + :raise git.exc.InvalidGitRepositoryError: + + :raise git.exc.NoSuchPathError: + + :return: + :class:`Repo` + """ epath = path or os.getenv("GIT_DIR") if not epath: epath = os.getcwd() if Git.is_cygwin(): - # Given how the tests are written, this seems more likely to catch - # Cygwin git used from Windows than Windows git used from Cygwin. - # Therefore changing to Cygwin-style paths is the relevant operation. - epath = cygpath(epath) + # Given how the tests are written, this seems more likely to catch Cygwin + # git used from Windows than Windows git used from Cygwin. Therefore + # changing to Cygwin-style paths is the relevant operation. + epath = cygpath(str(epath)) epath = epath or path or os.getcwd() if not isinstance(epath, str): @@ -214,25 +235,26 @@ class Repo(object): if not os.path.exists(epath): raise NoSuchPathError(epath) - ## Walk up the path to find the `.git` dir. - # + # Walk up the path to find the `.git` dir. curpath = epath git_dir = None while curpath: # ABOUT osp.NORMPATH - # It's important to normalize the paths, as submodules will otherwise initialize their - # repo instances with paths that depend on path-portions that will not exist after being - # removed. It's just cleaner. + # It's important to normalize the paths, as submodules will otherwise + # initialize their repo instances with paths that depend on path-portions + # that will not exist after being removed. It's just cleaner. if is_git_dir(curpath): git_dir = curpath # from man git-config : core.worktree - # Set the path to the root of the working tree. If GIT_COMMON_DIR environment - # variable is set, core.worktree is ignored and not used for determining the - # root of working tree. This can be overridden by the GIT_WORK_TREE environment - # variable. The value can be an absolute path or relative to the path to the .git - # directory, which is either specified by GIT_DIR, or automatically discovered. - # If GIT_DIR is specified but none of GIT_WORK_TREE and core.worktree is specified, - # the current working directory is regarded as the top level of your working tree. + # Set the path to the root of the working tree. If GIT_COMMON_DIR + # environment variable is set, core.worktree is ignored and not used for + # determining the root of working tree. This can be overridden by the + # GIT_WORK_TREE environment variable. The value can be an absolute path + # or relative to the path to the .git directory, which is either + # specified by GIT_DIR, or automatically discovered. If GIT_DIR is + # specified but none of GIT_WORK_TREE and core.worktree is specified, + # the current working directory is regarded as the top level of your + # working tree. self._working_tree_dir = os.path.dirname(git_dir) if os.environ.get("GIT_COMMON_DIR") is None: gitconf = self._config_reader("repository", git_dir) @@ -271,7 +293,7 @@ class Repo(object): try: self._bare = self.config_reader("repository").getboolean("core", "bare") except Exception: - # lets not assume the option exists, although it should + # Let's not assume the option exists, although it should. pass try: @@ -280,8 +302,8 @@ class Repo(object): except OSError: self._common_dir = "" - # adjust the wd in case we are actually bare - we didn't know that - # in the first place + # Adjust the working directory in case we are actually bare - we didn't know + # that in the first place. if self._bare: self._working_tree_dir = None # END working dir handling @@ -289,7 +311,7 @@ class Repo(object): self.working_dir: PathLike = self._working_tree_dir or self.common_dir self.git = self.GitCommandWrapperType(self.working_dir) - # special handling, in special times + # Special handling, in special times. rootpath = osp.join(self.common_dir, "objects") if issubclass(odbt, GitCmdObjectDB): self.odb = odbt(rootpath, self.git) @@ -311,16 +333,14 @@ class Repo(object): def close(self) -> None: if self.git: self.git.clear_cache() - # Tempfiles objects on Windows are holding references to - # open files until they are collected by the garbage - # collector, thus preventing deletion. - # TODO: Find these references and ensure they are closed - # and deleted synchronously rather than forcing a gc - # collection. - if is_win: + # Tempfiles objects on Windows are holding references to open files until + # they are collected by the garbage collector, thus preventing deletion. + # TODO: Find these references and ensure they are closed and deleted + # synchronously rather than forcing a gc collection. + if sys.platform == "win32": gc.collect() gitdb.util.mman.collect() - if is_win: + if sys.platform == "win32": gc.collect() def __eq__(self, rhs: object) -> bool: @@ -351,64 +371,106 @@ class Repo(object): @property def working_tree_dir(self) -> Optional[PathLike]: - """:return: The working tree directory of our git repository. If this is a bare repository, None is returned.""" + """ + :return: + The working tree directory of our git repository. + If this is a bare repository, ``None`` is returned. + """ return self._working_tree_dir @property def common_dir(self) -> PathLike: """ - :return: The git dir that holds everything except possibly HEAD, - FETCH_HEAD, ORIG_HEAD, COMMIT_EDITMSG, index, and logs/.""" + :return: + The git dir that holds everything except possibly HEAD, FETCH_HEAD, + ORIG_HEAD, COMMIT_EDITMSG, index, and logs/. + """ return self._common_dir or self.git_dir @property def bare(self) -> bool: - """:return: True if the repository is bare""" + """:return: ``True`` if the repository is bare""" return self._bare @property def heads(self) -> "IterableList[Head]": - """A list of ``Head`` objects representing the branch heads in - this repo + """A list of :class:`~git.refs.head.Head` objects representing the branch heads + in this repo. - :return: ``git.IterableList(Head, ...)``""" + :return: + ``git.IterableList(Head, ...)`` + """ return Head.list_items(self) + @property + def branches(self) -> "IterableList[Head]": + """Alias for heads. + A list of :class:`~git.refs.head.Head` objects representing the branch heads + in this repo. + + :return: + ``git.IterableList(Head, ...)`` + """ + return self.heads + @property def references(self) -> "IterableList[Reference]": - """A list of Reference objects representing tags, heads and remote references. + """A list of :class:`~git.refs.reference.Reference` objects representing tags, + heads and remote references. - :return: IterableList(Reference, ...)""" + :return: + ``git.IterableList(Reference, ...)`` + """ return Reference.list_items(self) - # alias for references - refs = references + @property + def refs(self) -> "IterableList[Reference]": + """Alias for references. + A list of :class:`~git.refs.reference.Reference` objects representing tags, + heads and remote references. - # alias for heads - branches = heads + :return: + ``git.IterableList(Reference, ...)`` + """ + return self.references @property def index(self) -> "IndexFile": - """:return: IndexFile representing this repository's index. - :note: This property can be expensive, as the returned ``IndexFile`` will be - reinitialized. It's recommended to re-use the object.""" + """ + :return: + A :class:`~git.index.base.IndexFile` representing this repository's index. + + :note: + This property can be expensive, as the returned + :class:`~git.index.base.IndexFile` will be reinitialized. + It is recommended to reuse the object. + """ return IndexFile(self) @property def head(self) -> "HEAD": - """:return: HEAD Object pointing to the current head reference""" + """ + :return: + :class:`~git.refs.head.HEAD` object pointing to the current head reference + """ return HEAD(self, "HEAD") @property def remotes(self) -> "IterableList[Remote]": - """A list of Remote objects allowing to access and manipulate remotes + """A list of :class:`~git.remote.Remote` objects allowing to access and + manipulate remotes. - :return: ``git.IterableList(Remote, ...)``""" + :return: + ``git.IterableList(Remote, ...)`` + """ return Remote.list_items(self) def remote(self, name: str = "origin") -> "Remote": - """:return: Remote with the specified name - :raise ValueError: if no remote with such a name exists""" + """:return: The remote with the specified name + + :raise ValueError: + If no remote with such a name exists. + """ r = Remote(self, name) if not r.exists(): raise ValueError("Remote named '%s' didn't exist" % name) @@ -419,13 +481,18 @@ class Repo(object): @property def submodules(self) -> "IterableList[Submodule]": """ - :return: git.IterableList(Submodule, ...) of direct submodules - available from the current head""" + :return: + git.IterableList(Submodule, ...) of direct submodules available from the + current head + """ return Submodule.list_items(self) def submodule(self, name: str) -> "Submodule": - """:return: Submodule with the given name - :raise ValueError: If no such submodule exists""" + """:return: The submodule with the given name + + :raise ValueError: + If no such submodule exists. + """ try: return self.submodules[name] except IndexError as e: @@ -433,38 +500,59 @@ class Repo(object): # END exception handling def create_submodule(self, *args: Any, **kwargs: Any) -> Submodule: - """Create a new submodule + """Create a new submodule. - :note: See the documentation of Submodule.add for a description of the - applicable parameters - :return: created submodules""" + :note: + For a description of the applicable parameters, see the documentation of + :meth:`Submodule.add <git.objects.submodule.base.Submodule.add>`. + + :return: + The created submodule. + """ return Submodule.add(self, *args, **kwargs) def iter_submodules(self, *args: Any, **kwargs: Any) -> Iterator[Submodule]: - """An iterator yielding Submodule instances, see Traversable interface - for a description of args and kwargs + """An iterator yielding Submodule instances. + + See the `~git.objects.util.Traversable` interface for a description of `args` + and `kwargs`. - :return: Iterator""" + :return: + Iterator + """ return RootModule(self).traverse(*args, **kwargs) def submodule_update(self, *args: Any, **kwargs: Any) -> Iterator[Submodule]: """Update the submodules, keeping the repository consistent as it will - take the previous state into consideration. For more information, please - see the documentation of RootModule.update""" + take the previous state into consideration. + + :note: + For more information, please see the documentation of + :meth:`RootModule.update <git.objects.submodule.root.RootModule.update>`. + """ return RootModule(self).update(*args, **kwargs) # }END submodules @property def tags(self) -> "IterableList[TagReference]": - """A list of ``Tag`` objects that are available in this repo + """A list of :class:`~git.refs.tag.TagReference` objects that are available in + this repo. - :return: ``git.IterableList(TagReference, ...)``""" + :return: + ``git.IterableList(TagReference, ...)`` + """ return TagReference.list_items(self) def tag(self, path: PathLike) -> TagReference: - """:return: TagReference Object, reference pointing to a Commit or Tag - :param path: path to the tag reference, i.e. 0.1.5 or tags/0.1.5""" + """ + :return: + :class:`~git.refs.tag.TagReference` object, reference pointing to a + :class:`~git.objects.commit.Commit` or tag + + :param path: + Path to the tag reference, e.g. ``0.1.5`` or ``tags/0.1.5``. + """ full_path = self._to_full_tag_path(path) return TagReference(self, full_path) @@ -486,15 +574,22 @@ class Repo(object): logmsg: Optional[str] = None, ) -> "Head": """Create a new head within the repository. - For more documentation, please see the Head.create method. - :return: newly created Head Reference""" + :note: + For more documentation, please see the + :meth:`Head.create <git.refs.head.Head.create>` method. + + :return: + Newly created :class:`~git.refs.head.Head` Reference. + """ return Head.create(self, path, commit, logmsg, force) def delete_head(self, *heads: "Union[str, Head]", **kwargs: Any) -> None: - """Delete the given heads + """Delete the given heads. - :param kwargs: Additional keyword arguments to be passed to git-branch""" + :param kwargs: + Additional keyword arguments to be passed to :manpage:`git-branch(1)`. + """ return Head.delete(self, *heads, **kwargs) def create_tag( @@ -506,22 +601,29 @@ class Repo(object): **kwargs: Any, ) -> TagReference: """Create a new tag reference. - For more documentation, please see the TagReference.create method. - :return: TagReference object""" + :note: + For more documentation, please see the + :meth:`TagReference.create <git.refs.tag.TagReference.create>` method. + + :return: + :class:`~git.refs.tag.TagReference` object + """ return TagReference.create(self, path, ref, message, force, **kwargs) def delete_tag(self, *tags: TagReference) -> None: - """Delete the given tag references""" + """Delete the given tag references.""" return TagReference.delete(self, *tags) def create_remote(self, name: str, url: str, **kwargs: Any) -> Remote: """Create a new remote. - For more information, please see the documentation of the Remote.create - methods + For more information, please see the documentation of the + :meth:`Remote.create <git.remote.Remote.create>` method. - :return: Remote reference""" + :return: + :class:`~git.remote.Remote` reference + """ return Remote.create(self, name, url, **kwargs) def delete_remote(self, remote: "Remote") -> str: @@ -531,9 +633,9 @@ class Repo(object): def _get_config_path(self, config_level: Lit_config_levels, git_dir: Optional[PathLike] = None) -> str: if git_dir is None: git_dir = self.git_dir - # we do not support an absolute path of the gitconfig on windows , - # use the global config instead - if is_win and config_level == "system": + # We do not support an absolute path of the gitconfig on Windows. + # Use the global config instead. + if sys.platform == "win32" and config_level == "system": config_level = "global" if config_level == "system": @@ -550,7 +652,7 @@ class Repo(object): else: return osp.normpath(osp.join(repo_dir, "config")) else: - assert_never( # type:ignore[unreachable] + assert_never( # type: ignore[unreachable] config_level, ValueError(f"Invalid configuration level: {config_level!r}"), ) @@ -561,17 +663,21 @@ class Repo(object): ) -> GitConfigParser: """ :return: - GitConfigParser allowing to read the full git configuration, but not to write it + :class:`~git.config.GitConfigParser` allowing to read the full git + configuration, but not to write it. The configuration will include values from the system, user and repository configuration files. :param config_level: - For possible values, see config_writer method - If None, all applicable levels will be used. Specify a level in case - you know which file you wish to read to prevent reading multiple files. - :note: On windows, system configuration cannot currently be read as the path is - unknown, instead the global path will be used.""" + For possible values, see the :meth:`config_writer` method. If ``None``, all + applicable levels will be used. Specify a level in case you know which file + you wish to read to prevent reading multiple files. + + :note: + On Windows, system configuration cannot currently be read as the path is + unknown, instead the global path will be used. + """ return self._config_reader(config_level=config_level) def _config_reader( @@ -592,46 +698,59 @@ class Repo(object): def config_writer(self, config_level: Lit_config_levels = "repository") -> GitConfigParser: """ :return: - GitConfigParser allowing to write values of the specified configuration file level. - Config writers should be retrieved, used to change the configuration, and written - right away as they will lock the configuration file in question and prevent other's - to write it. + A :class:`~git.config.GitConfigParser` allowing to write values of the + specified configuration file level. Config writers should be retrieved, used + to change the configuration, and written right away as they will lock the + configuration file in question and prevent other's to write it. :param config_level: - One of the following values - system = system wide configuration file - global = user level configuration file - repository = configuration file for this repository only""" + One of the following values: + + * ``"system"`` = system wide configuration file + * ``"global"`` = user level configuration file + * ``"`repository"`` = configuration file for this repository only + """ return GitConfigParser(self._get_config_path(config_level), read_only=False, repo=self, merge_includes=False) def commit(self, rev: Union[str, Commit_ish, None] = None) -> Commit: - """The Commit object for the specified revision + """The :class:`~git.objects.commit.Commit` object for the specified revision. - :param rev: revision specifier, see git-rev-parse for viable options. - :return: ``git.Commit`` + :param rev: + Revision specifier, see :manpage:`git-rev-parse(1)` for viable options. + + :return: + :class:`~git.objects.commit.Commit` """ if rev is None: return self.head.commit return self.rev_parse(str(rev) + "^0") def iter_trees(self, *args: Any, **kwargs: Any) -> Iterator["Tree"]: - """:return: Iterator yielding Tree objects - :note: Takes all arguments known to iter_commits method""" + """:return: Iterator yielding :class:`~git.objects.tree.Tree` objects + + :note: + Accepts all arguments known to the :meth:`iter_commits` method. + """ return (c.tree for c in self.iter_commits(*args, **kwargs)) def tree(self, rev: Union[Tree_ish, str, None] = None) -> "Tree": - """The Tree object for the given treeish revision + """The :class:`~git.objects.tree.Tree` object for the given tree-ish revision. + Examples:: repo.tree(repo.heads[0]) - :param rev: is a revision pointing to a Treeish ( being a commit or tree ) - :return: ``git.Tree`` + :param rev: + A revision pointing to a Treeish (being a commit or tree). + + :return: + :class:`~git.objects.tree.Tree` :note: - If you need a non-root level tree, find it by iterating the root tree. Otherwise - it cannot know about its path relative to the repository root and subsequent - operations might have unexpected results.""" + If you need a non-root level tree, find it by iterating the root tree. + Otherwise it cannot know about its path relative to the repository root and + subsequent operations might have unexpected results. + """ if rev is None: return self.head.commit.tree return self.rev_parse(str(rev) + "^{tree}") @@ -642,66 +761,87 @@ class Repo(object): paths: Union[PathLike, Sequence[PathLike]] = "", **kwargs: Any, ) -> Iterator[Commit]: - """A list of Commit objects representing the history of a given ref/commit + """An iterator of :class:`~git.objects.commit.Commit` objects representing the + history of a given ref/commit. :param rev: - revision specifier, see git-rev-parse for viable options. - If None, the active branch will be used. + Revision specifier, see :manpage:`git-rev-parse(1)` for viable options. + If ``None``, the active branch will be used. :param paths: - is an optional path or a list of paths; if set only commits that include the path - or paths will be returned + An optional path or a list of paths. If set, only commits that include the + path or paths will be returned. :param kwargs: - Arguments to be passed to git-rev-list - common ones are - max_count and skip + Arguments to be passed to :manpage:`git-rev-list(1)`. + Common ones are ``max_count`` and ``skip``. - :note: to receive only commits between two named revisions, use the - "revA...revB" revision specifier + :note: + To receive only commits between two named revisions, use the + ``"revA...revB"`` revision specifier. - :return: ``git.Commit[]``""" + :return: + Iterator of :class:`~git.objects.commit.Commit` objects + """ if rev is None: rev = self.head.commit return Commit.iter_items(self, rev, paths, **kwargs) - def merge_base(self, *rev: TBD, **kwargs: Any) -> List[Union[Commit_ish, None]]: - """Find the closest common ancestor for the given revision (e.g. Commits, Tags, References, etc) + def merge_base(self, *rev: TBD, **kwargs: Any) -> List[Commit]: + R"""Find the closest common ancestor for the given revision + (:class:`~git.objects.commit.Commit`\s, :class:`~git.refs.tag.Tag`\s, + :class:`~git.refs.reference.Reference`\s, etc.). + + :param rev: + At least two revs to find the common ancestor for. + + :param kwargs: + Additional arguments to be passed to the ``repo.git.merge_base()`` command + which does all the work. + + :return: + A list of :class:`~git.objects.commit.Commit` objects. If ``--all`` was + not passed as a keyword argument, the list will have at max one + :class:`~git.objects.commit.Commit`, or is empty if no common merge base + exists. - :param rev: At least two revs to find the common ancestor for. - :param kwargs: Additional arguments to be passed to the repo.git.merge_base() command which does all the work. - :return: A list of Commit objects. If --all was not specified as kwarg, the list will have at max one Commit, - or is empty if no common merge base exists. - :raises ValueError: If not at least two revs are provided + :raise ValueError: + If fewer than two revisions are provided. """ if len(rev) < 2: raise ValueError("Please specify at least two revs, got only %i" % len(rev)) - # end handle input + # END handle input - res: List[Union[Commit_ish, None]] = [] + res: List[Commit] = [] try: - lines = self.git.merge_base(*rev, **kwargs).splitlines() # List[str] + lines: List[str] = self.git.merge_base(*rev, **kwargs).splitlines() except GitCommandError as err: if err.status == 128: raise - # end handle invalid rev - # Status code 1 is returned if there is no merge-base - # (see https://github.com/git/git/blob/master/builtin/merge-base.c#L16) + # END handle invalid rev + # Status code 1 is returned if there is no merge-base. + # (See: https://github.com/git/git/blob/v2.44.0/builtin/merge-base.c#L19) return res - # end exception handling + # END exception handling for line in lines: res.append(self.commit(line)) - # end for each merge-base + # END for each merge-base return res - def is_ancestor(self, ancestor_rev: "Commit", rev: "Commit") -> bool: - """Check if a commit is an ancestor of another + def is_ancestor(self, ancestor_rev: Commit, rev: Commit) -> bool: + """Check if a commit is an ancestor of another. + + :param ancestor_rev: + Rev which should be an ancestor. + + :param rev: + Rev to test against `ancestor_rev`. - :param ancestor_rev: Rev which should be an ancestor - :param rev: Rev to test against ancestor_rev - :return: ``True``, ancestor_rev is an ancestor to rev. + :return: + ``True`` if `ancestor_rev` is an ancestor to `rev`. """ try: self.git.merge_base(ancestor_rev, rev, is_ancestor=True) @@ -719,7 +859,7 @@ class Repo(object): if object_info.type == object_type.encode(): return True else: - log.debug( + _logger.debug( "Commit hash points to an object of type '%s'. Requested were objects of type '%s'", object_info.type.decode(), object_type, @@ -728,7 +868,7 @@ class Repo(object): else: return True except BadObject: - log.debug("Commit hash is invalid.") + _logger.debug("Commit hash is invalid.") return False def _get_daemon_export(self) -> bool: @@ -754,9 +894,11 @@ class Repo(object): del _set_daemon_export def _get_alternates(self) -> List[str]: - """The list of alternates for this repo from which objects can be retrieved + """The list of alternates for this repo from which objects can be retrieved. - :return: list of strings being pathnames of alternates""" + :return: + List of strings being pathnames of alternates + """ if self.git_dir: alternates_path = osp.join(self.git_dir, "objects", "info", "alternates") @@ -767,16 +909,18 @@ class Repo(object): return [] def _set_alternates(self, alts: List[str]) -> None: - """Sets the alternates + """Set the alternates. :param alts: - is the array of string paths representing the alternates at which - git should look for objects, i.e. /home/user/repo/.git/objects + The array of string paths representing the alternates at which git should + look for objects, i.e. ``/home/user/repo/.git/objects``. + + :raise git.exc.NoSuchPathError: - :raise NoSuchPathError: :note: - The method does not check for the existence of the paths in alts - as the caller is responsible.""" + The method does not check for the existence of the paths in `alts`, as the + caller is responsible. + """ alternates_path = osp.join(self.common_dir, "objects", "info", "alternates") if not alts: if osp.isfile(alternates_path): @@ -801,27 +945,28 @@ class Repo(object): ) -> bool: """ :return: - ``True``, the repository is considered dirty. By default it will react - like a git-status without untracked files, hence it is dirty if the - index or the working copy have changes.""" + ``True`` if the repository is considered dirty. By default it will react + like a :manpage:`git-status(1)` without untracked files, hence it is dirty + if the index or the working copy have changes. + """ if self._bare: # Bare repositories with no associated working directory are # always considered to be clean. return False - # start from the one which is fastest to evaluate + # Start from the one which is fastest to evaluate. default_args = ["--abbrev=40", "--full-index", "--raw"] if not submodules: default_args.append("--ignore-submodules") if path: default_args.extend(["--", str(path)]) if index: - # diff index against HEAD + # diff index against HEAD. if osp.isfile(self.index.path) and len(self.git.diff("--cached", *default_args)): return True # END index handling if working_tree: - # diff index against working tree + # diff index against working tree. if len(self.git.diff(*default_args)): return True # END working tree handling @@ -837,18 +982,20 @@ class Repo(object): :return: list(str,...) - Files currently untracked as they have not been staged yet. Paths - are relative to the current working directory of the git command. + Files currently untracked as they have not been staged yet. Paths are + relative to the current working directory of the git command. :note: - ignored files will not appear here, i.e. files mentioned in .gitignore + Ignored files will not appear here, i.e. files mentioned in ``.gitignore``. + :note: - This property is expensive, as no cache is involved. To process the result, please - consider caching it yourself.""" + This property is expensive, as no cache is involved. To process the result, + please consider caching it yourself. + """ return self._get_untracked_files() def _get_untracked_files(self, *args: Any, **kwargs: Any) -> List[str]: - # make sure we get all files, not only untracked directories + # Make sure we get all files, not only untracked directories. proc = self.git.status(*args, porcelain=True, untracked_files=True, as_process=True, **kwargs) # Untracked files prefix in porcelain mode prefix = "?? " @@ -868,21 +1015,25 @@ class Repo(object): return untracked_files def ignored(self, *paths: PathLike) -> List[str]: - """Checks if paths are ignored via .gitignore - Doing so using the "git check-ignore" method. + """Checks if paths are ignored via ``.gitignore``. - :param paths: List of paths to check whether they are ignored or not - :return: subset of those paths which are ignored + This does so using the :manpage:`git-check-ignore(1)` method. + + :param paths: + List of paths to check whether they are ignored or not. + + :return: + Subset of those paths which are ignored """ try: proc: str = self.git.check_ignore(*paths) except GitCommandError as err: - # If return code is 1, this means none of the items in *paths - # are ignored by Git, so return an empty list. Raise the - # exception on all other return codes. if err.status == 1: + # If return code is 1, this means none of the items in *paths are + # ignored by Git, so return an empty list. return [] else: + # Raise the exception on all other return codes. raise return proc.replace("\\\\", "\\").replace('"', "").split("\n") @@ -891,24 +1042,33 @@ class Repo(object): def active_branch(self) -> Head: """The name of the currently active branch. - :raises TypeError: If HEAD is detached - :return: Head to the active branch""" + :raise TypeError: + If HEAD is detached. + + :return: + :class:`~git.refs.head.Head` to the active branch + """ # reveal_type(self.head.reference) # => Reference return self.head.reference - def blame_incremental(self, rev: str | HEAD, file: str, **kwargs: Any) -> Iterator["BlameEntry"]: + def blame_incremental(self, rev: str | HEAD | None, file: str, **kwargs: Any) -> Iterator["BlameEntry"]: """Iterator for blame information for the given file at the given revision. - Unlike .blame(), this does not return the actual file's contents, only - a stream of BlameEntry tuples. + Unlike :meth:`blame`, this does not return the actual file's contents, only a + stream of :class:`BlameEntry` tuples. - :param rev: revision specifier, see git-rev-parse for viable options. - :return: lazy iterator of BlameEntry tuples, where the commit - indicates the commit to blame for the line, and range - indicates a span of line numbers in the resulting file. + :param rev: + Revision specifier. If ``None``, the blame will include all the latest + uncommitted changes. Otherwise, anything successfully parsed by + :manpage:`git-rev-parse(1)` is a valid option. + + :return: + Lazy iterator of :class:`BlameEntry` tuples, where the commit indicates the + commit to blame for the line, and range indicates a span of line numbers in + the resulting file. - If you combine all line number ranges outputted by this command, you - should get a continuous range spanning all line numbers in the file. + If you combine all line number ranges outputted by this command, you should get + a continuous range spanning all line numbers in the file. """ data: bytes = self.git.blame(rev, "--", file, p=True, incremental=True, stdout_as_string=False, **kwargs) @@ -917,7 +1077,8 @@ class Repo(object): stream = (line for line in data.split(b"\n") if line) while True: try: - line = next(stream) # when exhausted, causes a StopIteration, terminating this function + # When exhausted, causes a StopIteration, terminating this function. + line = next(stream) except StopIteration: return split_line = line.split() @@ -926,8 +1087,8 @@ class Repo(object): num_lines = int(num_lines_b) orig_lineno = int(orig_lineno_b) if hexsha not in commits: - # Now read the next few lines and build up a dict of properties - # for this commit + # Now read the next few lines and build up a dict of properties for this + # commit. props: Dict[bytes, bytes] = {} while True: try: @@ -935,14 +1096,14 @@ class Repo(object): except StopIteration: return if line == b"boundary": - # "boundary" indicates a root commit and occurs - # instead of the "previous" tag + # "boundary" indicates a root commit and occurs instead of the + # "previous" tag. continue tag, value = line.split(b" ", 1) props[tag] = value if tag == b"filename": - # "filename" formally terminates the entry for --incremental + # "filename" formally terminates the entry for --incremental. orig_filename = value break @@ -962,11 +1123,12 @@ class Repo(object): ) commits[hexsha] = c else: - # Discard all lines until we find "filename" which is - # guaranteed to be the last line + # Discard all lines until we find "filename" which is guaranteed to be + # the last line. while True: try: - line = next(stream) # will fail if we reach the EOF unexpectedly + # Will fail if we reach the EOF unexpectedly. + line = next(stream) except StopIteration: return tag, value = line.split(b" ", 1) @@ -983,7 +1145,7 @@ class Repo(object): def blame( self, - rev: Union[str, HEAD], + rev: Union[str, HEAD, None], file: str, incremental: bool = False, rev_opts: Optional[List[str]] = None, @@ -991,12 +1153,19 @@ class Repo(object): ) -> List[List[Commit | List[str | bytes] | None]] | Iterator[BlameEntry] | None: """The blame information for the given file at the given revision. - :param rev: revision specifier, see git-rev-parse for viable options. + :param rev: + Revision specifier. If ``None``, the blame will include all the latest + uncommitted changes. Otherwise, anything successfully parsed by + :manpage:`git-rev-parse(1)` is a valid option. + :return: list: [git.Commit, list: [<line>]] - A list of lists associating a Commit object with a list of lines that - changed within the given commit. The Commit objects will be given in order - of appearance.""" + + A list of lists associating a :class:`~git.objects.commit.Commit` object + with a list of lines that changed within the given commit. The + :class:`~git.objects.commit.Commit` objects will be given in order of + appearance. + """ if incremental: return self.blame_incremental(rev, file, **kwargs) rev_opts = rev_opts or [] @@ -1027,14 +1196,15 @@ class Repo(object): parts = [] is_binary = True else: - # As we don't have an idea when the binary data ends, as it could contain multiple newlines - # in the process. So we rely on being able to decode to tell us what is is. - # This can absolutely fail even on text files, but even if it does, we should be fine treating it - # as binary instead + # As we don't have an idea when the binary data ends, as it could + # contain multiple newlines in the process. So we rely on being able to + # decode to tell us what it is. This can absolutely fail even on text + # files, but even if it does, we should be fine treating it as binary + # instead. parts = self.re_whitespace.split(line_str, 1) firstpart = parts[0] is_binary = False - # end handle decode of line + # END handle decode of line if self.re_hexsha_only.search(firstpart): # handles @@ -1111,10 +1281,12 @@ class Repo(object): line = line_str else: line = line_bytes - # NOTE: We are actually parsing lines out of binary data, which can lead to the - # binary being split up along the newline separator. We will append this to the - # blame we are currently looking at, even though it should be concatenated with - # the last line we have seen. + # NOTE: We are actually parsing lines out of binary + # data, which can lead to the binary being split up + # along the newline separator. We will append this + # to the blame we are currently looking at, even + # though it should be concatenated with the last + # line we have seen. blames[-1][1].append(line) info = {"id": sha} @@ -1133,32 +1305,35 @@ class Repo(object): expand_vars: bool = True, **kwargs: Any, ) -> "Repo": - """Initialize a git repository at the given path if specified + """Initialize a git repository at the given path if specified. :param path: - is the full path to the repo (traditionally ends with /<name>.git) - or None in which case the repository will be created in the current - working directory + The full path to the repo (traditionally ends with ``/<name>.git``). Or + ``None``, in which case the repository will be created in the current + working directory. :param mkdir: - if specified will create the repository directory if it doesn't - already exists. Creates the directory with a mode=0755. - Only effective if a path is explicitly given + If specified, will create the repository directory if it doesn't already + exist. Creates the directory with a mode=0755. + Only effective if a path is explicitly given. :param odbt: - Object DataBase type - a type which is constructed by providing - the directory containing the database objects, i.e. .git/objects. - It will be used to access all object data + Object DataBase type - a type which is constructed by providing the + directory containing the database objects, i.e. ``.git/objects``. It will be + used to access all object data. :param expand_vars: - if specified, environment variables will not be escaped. This - can lead to information disclosure, allowing attackers to - access the contents of environment variables + If specified, environment variables will not be escaped. This can lead to + information disclosure, allowing attackers to access the contents of + environment variables. :param kwargs: - keyword arguments serving as additional options to the git-init command + Keyword arguments serving as additional options to the + :manpage:`git-init(1)` command. - :return: ``git.Repo`` (the newly created repo)""" + :return: + :class:`Repo` (the newly created repo) + """ if path: path = expand_path(path, expand_vars) if mkdir and path and not osp.exists(path): @@ -1184,7 +1359,7 @@ class Repo(object): ) -> "Repo": odbt = kwargs.pop("odbt", odb_default_type) - # when pathlib.Path or other classbased path is passed + # When pathlib.Path or other class-based path is passed if not isinstance(path, str): path = str(path) @@ -1233,24 +1408,23 @@ class Repo(object): cmdline = getattr(proc, "args", "") cmdline = remove_password_if_present(cmdline) - log.debug("Cmd(%s)'s unused stdout: %s", cmdline, stdout) + _logger.debug("Cmd(%s)'s unused stdout: %s", cmdline, stdout) finalize_process(proc, stderr=stderr) - # our git command could have a different working dir than our actual - # environment, hence we prepend its working dir if required + # Our git command could have a different working dir than our actual + # environment, hence we prepend its working dir if required. if not osp.isabs(path): path = osp.join(git._working_dir, path) if git._working_dir is not None else path repo = cls(path, odbt=odbt) - # retain env values that were passed to _clone() + # Retain env values that were passed to _clone(). repo.git.update_environment(**git.environment()) - # adjust remotes - there may be operating systems which use backslashes, - # These might be given as initial paths, but when handling the config file - # that contains the remote from which we were clones, git stops liking it - # as it will escape the backslashes. Hence we undo the escaping just to be - # sure + # Adjust remotes - there may be operating systems which use backslashes, These + # might be given as initial paths, but when handling the config file that + # contains the remote from which we were clones, git stops liking it as it will + # escape the backslashes. Hence we undo the escaping just to be sure. if repo.remotes: with repo.remotes[0].config_writer as writer: writer.set_value("url", Git.polish_url(repo.remotes[0].url)) @@ -1268,20 +1442,41 @@ class Repo(object): ) -> "Repo": """Create a clone from this repository. - :param path: is the full path of the new repo (traditionally ends with ./<name>.git). - :param progress: See 'git.remote.Remote.push'. - :param multi_options: A list of Clone options that can be provided multiple times. One - option per list item which is passed exactly as specified to clone. - For example ['--config core.filemode=false', '--config core.ignorecase', - '--recurse-submodule=repo1_path', '--recurse-submodule=repo2_path'] - :param allow_unsafe_protocols: Allow unsafe protocols to be used, like ext - :param allow_unsafe_options: Allow unsafe options to be used, like --upload-pack + :param path: + The full path of the new repo (traditionally ends with ``./<name>.git``). + + :param progress: + See :meth:`Remote.push <git.remote.Remote.push>`. + + :param multi_options: + A list of :manpage:`git-clone(1)` options that can be provided multiple + times. + + One option per list item which is passed exactly as specified to clone. + For example:: + + [ + "--config core.filemode=false", + "--config core.ignorecase", + "--recurse-submodule=repo1_path", + "--recurse-submodule=repo2_path", + ] + + :param allow_unsafe_protocols: + Allow unsafe protocols to be used, like ``ext``. + + :param allow_unsafe_options: + Allow unsafe options to be used, like ``--upload-pack``. + :param kwargs: - * odbt = ObjectDatabase Type, allowing to determine the object database - implementation used by the returned Repo instance - * All remaining keyword arguments are given to the git-clone command + * ``odbt`` = ObjectDatabase Type, allowing to determine the object database + implementation used by the returned :class:`Repo` instance. + * All remaining keyword arguments are given to the :manpage:`git-clone(1)` + command. - :return: ``git.Repo`` (the newly cloned repo)""" + :return: + :class:`Repo` (the newly cloned repo) + """ return self._clone( self.git, self.common_dir, @@ -1306,22 +1501,40 @@ class Repo(object): allow_unsafe_options: bool = False, **kwargs: Any, ) -> "Repo": - """Create a clone from the given URL - - :param url: valid git url, see http://www.kernel.org/pub/software/scm/git/docs/git-clone.html#URLS - :param to_path: Path to which the repository should be cloned to - :param progress: See 'git.remote.Remote.push'. - :param env: Optional dictionary containing the desired environment variables. - Note: Provided variables will be used to update the execution - environment for `git`. If some variable is not specified in `env` - and is defined in `os.environ`, value from `os.environ` will be used. - If you want to unset some variable, consider providing empty string - as its value. - :param multi_options: See ``clone`` method - :param allow_unsafe_protocols: Allow unsafe protocols to be used, like ext - :param allow_unsafe_options: Allow unsafe options to be used, like --upload-pack - :param kwargs: see the ``clone`` method - :return: Repo instance pointing to the cloned directory""" + """Create a clone from the given URL. + + :param url: + Valid git url, see: https://git-scm.com/docs/git-clone#URLS + + :param to_path: + Path to which the repository should be cloned to. + + :param progress: + See :meth:`Remote.push <git.remote.Remote.push>`. + + :param env: + Optional dictionary containing the desired environment variables. + + Note: Provided variables will be used to update the execution environment + for ``git``. If some variable is not specified in `env` and is defined in + :attr:`os.environ`, value from :attr:`os.environ` will be used. If you want + to unset some variable, consider providing empty string as its value. + + :param multi_options: + See the :meth:`clone` method. + + :param allow_unsafe_protocols: + Allow unsafe protocols to be used, like ``ext``. + + :param allow_unsafe_options: + Allow unsafe options to be used, like ``--upload-pack``. + + :param kwargs: + See the :meth:`clone` method. + + :return: + :class:`Repo` instance pointing to the cloned directory. + """ git = cls.GitCommandWrapperType(os.getcwd()) if env is not None: git.update_environment(**env) @@ -1346,18 +1559,30 @@ class Repo(object): ) -> Repo: """Archive the tree at the given revision. - :param ostream: file compatible stream object to which the archive will be written as bytes - :param treeish: is the treeish name/id, defaults to active branch - :param prefix: is the optional prefix to prepend to each filename in the archive - :param kwargs: Additional arguments passed to git-archive + :param ostream: + File-compatible stream object to which the archive will be written as bytes. + + :param treeish: + The treeish name/id, defaults to active branch. + + :param prefix: + The optional prefix to prepend to each filename in the archive. + + :param kwargs: + Additional arguments passed to :manpage:`git-archive(1)`: + + * Use the ``format`` argument to define the kind of format. Use specialized + ostreams to write any format supported by Python. + * You may specify the special ``path`` keyword, which may either be a + repository-relative path to a directory or file to place into the archive, + or a list or tuple of multiple paths. - * Use the 'format' argument to define the kind of format. Use - specialized ostreams to write any format supported by python. - * You may specify the special **path** keyword, which may either be a repository-relative - path to a directory or file to place into the archive, or a list or tuple of multiple paths. + :raise git.exc.GitCommandError: + If something went wrong. - :raise GitCommandError: in case something went wrong - :return: self""" + :return: + self + """ if treeish is None: treeish = self.head.commit if prefix and "prefix" not in kwargs: @@ -1367,22 +1592,27 @@ class Repo(object): path = cast(Union[PathLike, List[PathLike], Tuple[PathLike, ...]], path) if not isinstance(path, (tuple, list)): path = [path] - # end assure paths is list + # END ensure paths is list (or tuple) self.git.archive("--", treeish, *path, **kwargs) return self def has_separate_working_tree(self) -> bool: """ - :return: True if our git_dir is not at the root of our working_tree_dir, but a .git file with a - platform agnositic symbolic link. Our git_dir will be wherever the .git file points to - :note: bare repositories will always return False here + :return: + True if our :attr:`git_dir` is not at the root of our + :attr:`working_tree_dir`, but a ``.git`` file with a platform-agnostic + symbolic link. Our :attr:`git_dir` will be wherever the ``.git`` file points + to. + + :note: + Bare repositories will always return ``False`` here. """ if self.bare: return False if self.working_tree_dir: return osp.isfile(osp.join(self.working_tree_dir, ".git")) else: - return False # or raise Error? + return False # Or raise Error? rev_parse = rev_parse @@ -1392,9 +1622,10 @@ class Repo(object): def currently_rebasing_on(self) -> Commit | None: """ - :return: The commit which is currently being replayed while rebasing. + :return: + The commit which is currently being replayed while rebasing. - None if we are not currently rebasing. + ``None`` if we are not currently rebasing. """ if self.git_dir: rebase_head_file = osp.join(self.git_dir, "REBASE_HEAD") diff --git a/git/repo/fun.py b/git/repo/fun.py index ae35aa8..182cf82 100644 --- a/git/repo/fun.py +++ b/git/repo/fun.py @@ -1,48 +1,51 @@ -"""Package with general repository related functions""" +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""General repository-related functions.""" + from __future__ import annotations + +__all__ = [ + "rev_parse", + "is_git_dir", + "touch", + "find_submodule_git_dir", + "name_to_object", + "short_to_long", + "deref_tag", + "to_commit", + "find_worktree_git_dir", +] + import os -import stat +import os.path as osp from pathlib import Path +import stat from string import digits +from gitdb.exc import BadName, BadObject + +from git.cmd import Git from git.exc import WorkTreeRepositoryUnsupported from git.objects import Object from git.refs import SymbolicReference -from git.util import hex_to_bin, bin_to_hex, cygpath -from gitdb.exc import ( - BadObject, - BadName, -) - -import os.path as osp -from git.cmd import Git +from git.util import cygpath, bin_to_hex, hex_to_bin # Typing ---------------------------------------------------------------------- -from typing import Union, Optional, cast, TYPE_CHECKING -from git.types import Commit_ish +from typing import Optional, TYPE_CHECKING, Union, cast, overload + +from git.types import AnyGitObject, Literal, PathLike if TYPE_CHECKING: - from git.types import PathLike - from .base import Repo from git.db import GitCmdObjectDB + from git.objects import Commit, TagObject from git.refs.reference import Reference - from git.objects import Commit, TagObject, Blob, Tree from git.refs.tag import Tag -# ---------------------------------------------------------------------------- + from .base import Repo -__all__ = ( - "rev_parse", - "is_git_dir", - "touch", - "find_submodule_git_dir", - "name_to_object", - "short_to_long", - "deref_tag", - "to_commit", - "find_worktree_git_dir", -) +# ---------------------------------------------------------------------------- def touch(filename: str) -> str: @@ -51,14 +54,14 @@ def touch(filename: str) -> str: return filename -def is_git_dir(d: "PathLike") -> bool: - """This is taken from the git setup.c:is_git_directory - function. +def is_git_dir(d: PathLike) -> bool: + """This is taken from the git setup.c:is_git_directory function. - @throws WorkTreeRepositoryUnsupported if it sees a worktree directory. It's quite hacky to do that here, - but at least clearly indicates that we don't support it. - There is the unlikely danger to throw if we see directories which just look like a worktree dir, - but are none.""" + :raise git.exc.WorkTreeRepositoryUnsupported: + If it sees a worktree directory. It's quite hacky to do that here, but at least + clearly indicates that we don't support it. There is the unlikely danger to + throw if we see directories which just look like a worktree dir, but are none. + """ if osp.isdir(d): if (osp.isdir(osp.join(d, "objects")) or "GIT_OBJECT_DIRECTORY" in os.environ) and osp.isdir( osp.join(d, "refs") @@ -74,7 +77,7 @@ def is_git_dir(d: "PathLike") -> bool: return False -def find_worktree_git_dir(dotgit: "PathLike") -> Optional[str]: +def find_worktree_git_dir(dotgit: PathLike) -> Optional[str]: """Search for a gitdir for this worktree.""" try: statbuf = os.stat(dotgit) @@ -93,7 +96,7 @@ def find_worktree_git_dir(dotgit: "PathLike") -> Optional[str]: return None -def find_submodule_git_dir(d: "PathLike") -> Optional["PathLike"]: +def find_submodule_git_dir(d: PathLike) -> Optional[PathLike]: """Search for a submodule repo.""" if is_git_dir(d): return d @@ -102,28 +105,33 @@ def find_submodule_git_dir(d: "PathLike") -> Optional["PathLike"]: with open(d) as fp: content = fp.read().rstrip() except IOError: - # it's probably not a file + # It's probably not a file. pass else: if content.startswith("gitdir: "): path = content[8:] if Git.is_cygwin(): - ## Cygwin creates submodules prefixed with `/cygdrive/...` suffixes. - # Cygwin git understands Cygwin paths much better than Windows ones + # Cygwin creates submodules prefixed with `/cygdrive/...`. + # Cygwin git understands Cygwin paths much better than Windows ones. # Also the Cygwin tests are assuming Cygwin paths. path = cygpath(path) if not osp.isabs(path): path = osp.normpath(osp.join(osp.dirname(d), path)) return find_submodule_git_dir(path) - # end handle exception + # END handle exception return None def short_to_long(odb: "GitCmdObjectDB", hexsha: str) -> Optional[bytes]: - """:return: long hexadecimal sha1 from the given less-than-40 byte hexsha - or None if no candidate could be found. - :param hexsha: hexsha with less than 40 byte""" + """ + :return: + Long hexadecimal sha1 from the given less than 40 byte hexsha, or ``None`` if no + candidate could be found. + + :param hexsha: + hexsha with less than 40 bytes. + """ try: return bin_to_hex(odb.partial_to_complete_sha_hex(hexsha)) except BadObject: @@ -131,29 +139,39 @@ def short_to_long(odb: "GitCmdObjectDB", hexsha: str) -> Optional[bytes]: # END exception handling -def name_to_object( - repo: "Repo", name: str, return_ref: bool = False -) -> Union[SymbolicReference, "Commit", "TagObject", "Blob", "Tree"]: +@overload +def name_to_object(repo: "Repo", name: str, return_ref: Literal[False] = ...) -> AnyGitObject: ... + + +@overload +def name_to_object(repo: "Repo", name: str, return_ref: Literal[True]) -> Union[AnyGitObject, SymbolicReference]: ... + + +def name_to_object(repo: "Repo", name: str, return_ref: bool = False) -> Union[AnyGitObject, SymbolicReference]: """ - :return: object specified by the given name, hexshas ( short and long ) - as well as references are supported - :param return_ref: if name specifies a reference, we will return the reference - instead of the object. Otherwise it will raise BadObject or BadName + :return: + Object specified by the given name - hexshas (short and long) as well as + references are supported. + + :param return_ref: + If ``True``, and name specifies a reference, we will return the reference + instead of the object. Otherwise it will raise :exc:`~gitdb.exc.BadObject` or + :exc:`~gitdb.exc.BadName`. """ hexsha: Union[None, str, bytes] = None - # is it a hexsha ? Try the most common ones, which is 7 to 40 + # Is it a hexsha? Try the most common ones, which is 7 to 40. if repo.re_hexsha_shortened.match(name): if len(name) != 40: - # find long sha for short sha + # Find long sha for short sha. hexsha = short_to_long(repo.odb, name) else: hexsha = name # END handle short shas # END find sha if it matches - # if we couldn't find an object for what seemed to be a short hexsha - # try to find it as reference anyway, it could be named 'aaa' for instance + # If we couldn't find an object for what seemed to be a short hexsha, try to find it + # as reference anyway, it could be named 'aaa' for instance. if hexsha is None: for base in ( "%s", @@ -174,12 +192,12 @@ def name_to_object( # END for each base # END handle hexsha - # didn't find any ref, this is an error + # Didn't find any ref, this is an error. if return_ref: raise BadObject("Couldn't find reference named %r" % name) # END handle return ref - # tried everything ? fail + # Tried everything ? fail. if hexsha is None: raise BadName(name) # END assert hexsha was found @@ -187,8 +205,8 @@ def name_to_object( return Object.new_from_sha(repo, hex_to_bin(hexsha)) -def deref_tag(tag: "Tag") -> "TagObject": - """Recursively dereference a tag and return the resulting object""" +def deref_tag(tag: "Tag") -> AnyGitObject: + """Recursively dereference a tag and return the resulting object.""" while True: try: tag = tag.object @@ -198,8 +216,8 @@ def deref_tag(tag: "Tag") -> "TagObject": return tag -def to_commit(obj: Object) -> Union["Commit", "TagObject"]: - """Convert the given object to a commit if possible and return it""" +def to_commit(obj: Object) -> "Commit": + """Convert the given object to a commit if possible and return it.""" if obj.type == "tag": obj = deref_tag(obj) @@ -209,23 +227,39 @@ def to_commit(obj: Object) -> Union["Commit", "TagObject"]: return obj -def rev_parse(repo: "Repo", rev: str) -> Union["Commit", "Tag", "Tree", "Blob"]: +def rev_parse(repo: "Repo", rev: str) -> AnyGitObject: + """Parse a revision string. Like :manpage:`git-rev-parse(1)`. + + :return: + `~git.objects.base.Object` at the given revision. + + This may be any type of git object: + + * :class:`Commit <git.objects.commit.Commit>` + * :class:`TagObject <git.objects.tag.TagObject>` + * :class:`Tree <git.objects.tree.Tree>` + * :class:`Blob <git.objects.blob.Blob>` + + :param rev: + :manpage:`git-rev-parse(1)`-compatible revision specification as string. + Please see :manpage:`git-rev-parse(1)` for details. + + :raise gitdb.exc.BadObject: + If the given revision could not be found. + + :raise ValueError: + If `rev` couldn't be parsed. + + :raise IndexError: + If an invalid reflog index is specified. """ - :return: Object at the given revision, either Commit, Tag, Tree or Blob - :param rev: git-rev-parse compatible revision specification as string, please see - http://www.kernel.org/pub/software/scm/git/docs/git-rev-parse.html - for details - :raise BadObject: if the given revision could not be found - :raise ValueError: If rev couldn't be parsed - :raise IndexError: If invalid reflog index is specified""" - - # colon search mode ? + # Are we in colon search mode? if rev.startswith(":/"): - # colon search mode - raise NotImplementedError("commit by message search ( regex )") + # Colon search mode + raise NotImplementedError("commit by message search (regex)") # END handle search - obj: Union[Commit_ish, "Reference", None] = None + obj: Optional[AnyGitObject] = None ref = None output_type = "commit" start = 0 @@ -240,19 +274,17 @@ def rev_parse(repo: "Repo", rev: str) -> Union["Commit", "Tag", "Tree", "Blob"]: token = rev[start] if obj is None: - # token is a rev name + # token is a rev name. if start == 0: ref = repo.head.ref else: if token == "@": ref = cast("Reference", name_to_object(repo, rev[:start], return_ref=True)) else: - obj = cast(Commit_ish, name_to_object(repo, rev[:start])) + obj = name_to_object(repo, rev[:start]) # END handle token # END handle refname else: - assert obj is not None - if ref is not None: obj = cast("Commit", ref.commit) # END handle ref @@ -260,29 +292,29 @@ def rev_parse(repo: "Repo", rev: str) -> Union["Commit", "Tag", "Tree", "Blob"]: start += 1 - # try to parse {type} + # Try to parse {type}. if start < lr and rev[start] == "{": end = rev.find("}", start) if end == -1: raise ValueError("Missing closing brace to define type in %s" % rev) - output_type = rev[start + 1 : end] # exclude brace + output_type = rev[start + 1 : end] # Exclude brace. - # handle type + # Handle type. if output_type == "commit": - pass # default + pass # Default. elif output_type == "tree": try: - obj = cast(Commit_ish, obj) + obj = cast(AnyGitObject, obj) obj = to_commit(obj).tree except (AttributeError, ValueError): - pass # error raised later + pass # Error raised later. # END exception handling elif output_type in ("", "blob"): obj = cast("TagObject", obj) if obj and obj.type == "tag": obj = deref_tag(obj) else: - # cannot do anything for non-tags + # Cannot do anything for non-tags. pass # END handle tag elif token == "@": @@ -290,11 +322,10 @@ def rev_parse(repo: "Repo", rev: str) -> Union["Commit", "Tag", "Tree", "Blob"]: assert ref is not None, "Require Reference to access reflog" revlog_index = None try: - # transform reversed index into the format of our revlog + # Transform reversed index into the format of our revlog. revlog_index = -(int(output_type) + 1) except ValueError as e: - # TODO: Try to parse the other date options, using parse_date - # maybe + # TODO: Try to parse the other date options, using parse_date maybe. raise NotImplementedError("Support for additional @{...} modes not implemented") from e # END handle revlog index @@ -306,23 +337,24 @@ def rev_parse(repo: "Repo", rev: str) -> Union["Commit", "Tag", "Tree", "Blob"]: obj = Object.new_from_sha(repo, hex_to_bin(entry.newhexsha)) - # make it pass the following checks + # Make it pass the following checks. output_type = "" else: raise ValueError("Invalid output type: %s ( in %s )" % (output_type, rev)) # END handle output type - # empty output types don't require any specific type, its just about dereferencing tags + # Empty output types don't require any specific type, its just about + # dereferencing tags. if output_type and obj and obj.type != output_type: raise ValueError("Could not accommodate requested object type %r, got %s" % (output_type, obj.type)) # END verify output type - start = end + 1 # skip brace + start = end + 1 # Skip brace. parsed_to = start continue # END parse type - # try to parse a number + # Try to parse a number. num = 0 if token != ":": found_digit = False @@ -336,17 +368,16 @@ def rev_parse(repo: "Repo", rev: str) -> Union["Commit", "Tag", "Tree", "Blob"]: # END handle number # END number parse loop - # no explicit number given, 1 is the default - # It could be 0 though + # No explicit number given, 1 is the default. It could be 0 though. if not found_digit: num = 1 # END set default num # END number parsing only if non-blob mode parsed_to = start - # handle hierarchy walk + # Handle hierarchy walk. try: - obj = cast(Commit_ish, obj) + obj = cast(AnyGitObject, obj) if token == "~": obj = to_commit(obj) for _ in range(num): @@ -354,7 +385,7 @@ def rev_parse(repo: "Repo", rev: str) -> Union["Commit", "Tag", "Tree", "Blob"]: # END for each history item to walk elif token == "^": obj = to_commit(obj) - # must be n'th parent + # Must be n'th parent. if num: obj = obj.parents[num - 1] elif token == ":": @@ -373,9 +404,9 @@ def rev_parse(repo: "Repo", rev: str) -> Union["Commit", "Tag", "Tree", "Blob"]: # END exception handling # END parse loop - # still no obj ? Its probably a simple name + # Still no obj? It's probably a simple name. if obj is None: - obj = cast(Commit_ish, name_to_object(repo, rev)) + obj = name_to_object(repo, rev) parsed_to = lr # END handle simple name diff --git a/git/types.py b/git/types.py index 21276b5..cce1845 100644 --- a/git/types.py +++ b/git/types.py @@ -1,101 +1,263 @@ -# -*- coding: utf-8 -*- -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ -# flake8: noqa +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ import os import sys from typing import ( + Any, + Callable, Dict, + List, NoReturn, + Optional, Sequence as Sequence, Tuple, - Union, - Any, - Optional, - Callable, TYPE_CHECKING, + Type, TypeVar, -) # noqa: F401 + Union, +) +import warnings if sys.version_info >= (3, 8): from typing import ( Literal, - TypedDict, Protocol, SupportsIndex as SupportsIndex, + TypedDict, runtime_checkable, - ) # noqa: F401 + ) else: from typing_extensions import ( Literal, + Protocol, SupportsIndex as SupportsIndex, TypedDict, - Protocol, runtime_checkable, - ) # noqa: F401 - -# if sys.version_info >= (3, 10): -# from typing import TypeGuard # noqa: F401 -# else: -# from typing_extensions import TypeGuard # noqa: F401 - -PathLike = Union[str, "os.PathLike[str]"] + ) if TYPE_CHECKING: - from git.repo import Repo from git.objects import Commit, Tree, TagObject, Blob + from git.repo import Repo - # from git.refs import SymbolicReference +PathLike = Union[str, "os.PathLike[str]"] +"""A :class:`str` (Unicode) based file or directory path.""" TBD = Any +"""Alias of :class:`~typing.Any`, when a type hint is meant to become more specific.""" + _T = TypeVar("_T") +"""Type variable used internally in GitPython.""" + +AnyGitObject = Union["Commit", "Tree", "TagObject", "Blob"] +"""Union of the :class:`~git.objects.base.Object`-based types that represent actual git +object types. + +As noted in :class:`~git.objects.base.Object`, which has further details, these are: + +* :class:`Blob <git.objects.blob.Blob>` +* :class:`Tree <git.objects.tree.Tree>` +* :class:`Commit <git.objects.commit.Commit>` +* :class:`TagObject <git.objects.tag.TagObject>` + +Those GitPython classes represent the four git object types, per +:manpage:`gitglossary(7)`: + +* "blob": https://git-scm.com/docs/gitglossary#def_blob_object +* "tree object": https://git-scm.com/docs/gitglossary#def_tree_object +* "commit object": https://git-scm.com/docs/gitglossary#def_commit_object +* "tag object": https://git-scm.com/docs/gitglossary#def_tag_object + +For more general information on git objects and their types as git understands them: + +* "object": https://git-scm.com/docs/gitglossary#def_object +* "object type": https://git-scm.com/docs/gitglossary#def_object_type + +:note: + See also the :class:`Tree_ish` and :class:`Commit_ish` unions. +""" + +Tree_ish = Union["Commit", "Tree", "TagObject"] +"""Union of :class:`~git.objects.base.Object`-based types that are typically tree-ish. + +See :manpage:`gitglossary(7)` on "tree-ish": +https://git-scm.com/docs/gitglossary#def_tree-ish + +:note: + :class:`~git.objects.tree.Tree` and :class:`~git.objects.commit.Commit` are the + classes whose instances are all tree-ish. This union includes them, but also + :class:`~git.objects.tag.TagObject`, only **most** of whose instances are tree-ish. + Whether a particular :class:`~git.objects.tag.TagObject` peels (recursively + dereferences) to a tree or commit, rather than a blob, can in general only be known + at runtime. In practice, git tag objects are nearly always used for tagging commits, + and such tags are tree-ish because commits are tree-ish. + +:note: + See also the :class:`AnyGitObject` union of all four classes corresponding to git + object types. +""" + +Commit_ish = Union["Commit", "TagObject"] +"""Union of :class:`~git.objects.base.Object`-based types that are typically commit-ish. + +See :manpage:`gitglossary(7)` on "commit-ish": +https://git-scm.com/docs/gitglossary#def_commit-ish + +:note: + :class:`~git.objects.commit.Commit` is the only class whose instances are all + commit-ish. This union type includes :class:`~git.objects.commit.Commit`, but also + :class:`~git.objects.tag.TagObject`, only **most** of whose instances are + commit-ish. Whether a particular :class:`~git.objects.tag.TagObject` peels + (recursively dereferences) to a commit, rather than a tree or blob, can in general + only be known at runtime. In practice, git tag objects are nearly always used for + tagging commits, and such tags are of course commit-ish. + +:note: + See also the :class:`AnyGitObject` union of all four classes corresponding to git + object types. +""" + +GitObjectTypeString = Literal["commit", "tag", "blob", "tree"] +"""Literal strings identifying git object types and the +:class:`~git.objects.base.Object`-based types that represent them. + +See the :attr:`Object.type <git.objects.base.Object.type>` attribute. These are its +values in :class:`~git.objects.base.Object` subclasses that represent git objects. These +literals therefore correspond to the types in the :class:`AnyGitObject` union. + +These are the same strings git itself uses to identify its four object types. +See :manpage:`gitglossary(7)` on "object type": +https://git-scm.com/docs/gitglossary#def_object_type +""" + +Lit_commit_ish: Type[Literal["commit", "tag"]] +"""Deprecated. Type of literal strings identifying typically-commitish git object types. + +Prior to a bugfix, this type had been defined more broadly. Any usage is in practice +ambiguous and likely to be incorrect. This type has therefore been made a static type +error to appear in annotations. It is preserved, with a deprecated status, to avoid +introducing runtime errors in code that refers to it, but it should not be used. + +Instead of this type: + +* For the type of the string literals associated with :class:`Commit_ish`, use + ``Literal["commit", "tag"]`` or create a new type alias for it. That is equivalent to + this type as currently defined (but usable in statically checked type annotations). + +* For the type of all four string literals associated with :class:`AnyGitObject`, use + :class:`GitObjectTypeString`. That is equivalent to the old definition of this type + prior to the bugfix (and is also usable in statically checked type annotations). +""" + + +def _getattr(name: str) -> Any: + if name != "Lit_commit_ish": + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + warnings.warn( + "Lit_commit_ish is deprecated. It is currently defined as " + '`Literal["commit", "tag"]`, which should be used in its place if desired. It ' + 'had previously been defined as `Literal["commit", "tag", "blob", "tree"]`, ' + "covering all four git object type strings including those that are never " + "commit-ish. For that, use the GitObjectTypeString type instead.", + DeprecationWarning, + stacklevel=2, + ) + return Literal["commit", "tag"] + + +if not TYPE_CHECKING: # Preserve static checking for undefined/misspelled attributes. + __getattr__ = _getattr + + +def __dir__() -> List[str]: + return [*globals(), "Lit_commit_ish"] -Tree_ish = Union["Commit", "Tree"] -Commit_ish = Union["Commit", "TagObject", "Blob", "Tree"] -Lit_commit_ish = Literal["commit", "tag", "blob", "tree"] # Config_levels --------------------------------------------------------- Lit_config_levels = Literal["system", "global", "user", "repository"] +"""Type of literal strings naming git configuration levels. + +These strings relate to which file a git configuration variable is in. +""" + +ConfigLevels_Tup = Tuple[Literal["system"], Literal["user"], Literal["global"], Literal["repository"]] +"""Static type of a tuple of the four strings representing configuration levels.""" # Progress parameter type alias ----------------------------------------- CallableProgress = Optional[Callable[[int, Union[str, float], Union[str, float, None], str], None]] +"""General type of a function or other callable used as a progress reporter for cloning. -# def is_config_level(inp: str) -> TypeGuard[Lit_config_levels]: -# # return inp in get_args(Lit_config_level) # only py >= 3.8 -# return inp in ("system", "user", "global", "repository") +This is the type of a function or other callable that reports the progress of a clone, +when passed as a ``progress`` argument to :meth:`Repo.clone <git.repo.base.Repo.clone>` +or :meth:`Repo.clone_from <git.repo.base.Repo.clone_from>`. +:note: + Those :meth:`~git.repo.base.Repo.clone` and :meth:`~git.repo.base.Repo.clone_from` + methods also accept :meth:`~git.util.RemoteProgress` instances, including instances + of its :meth:`~git.util.CallableRemoteProgress` subclass. -ConfigLevels_Tup = Tuple[Literal["system"], Literal["user"], Literal["global"], Literal["repository"]] +:note: + Unlike objects that match this type, :meth:`~git.util.RemoteProgress` instances are + not directly callable, not even when they are instances of + :meth:`~git.util.CallableRemoteProgress`, which wraps a callable and forwards + information to it but is not itself callable. + +:note: + This type also allows ``None``, for cloning without reporting progress. +""" # ----------------------------------------------------------------------------------- def assert_never(inp: NoReturn, raise_error: bool = True, exc: Union[Exception, None] = None) -> None: - """For use in exhaustive checking of literal or Enum in if/else chain. - Should only be reached if all members not handled OR attempt to pass non-members through chain. + """For use in exhaustive checking of a literal or enum in if/else chains. + + A call to this function should only be reached if not all members are handled, or if + an attempt is made to pass non-members through the chain. - If all members handled, type is Empty. Otherwise, will cause mypy error. - If non-members given, should cause mypy error at variable creation. + :param inp: + If all members are handled, the argument for `inp` will have the + :class:`~typing.Never`/:class:`~typing.NoReturn` type. + Otherwise, the type will mismatch and cause a mypy error. - If raise_error is True, will also raise AssertionError or the Exception passed to exc. + :param raise_error: + If ``True``, will also raise :exc:`ValueError` with a general + "unhandled literal" message, or the exception object passed as `exc`. + + :param exc: + It not ``None``, this should be an already-constructed exception object, to be + raised if `raise_error` is ``True``. """ if raise_error: if exc is None: - raise ValueError(f"An unhandled Literal ({inp}) in an if/else chain was found") + raise ValueError(f"An unhandled literal ({inp!r}) in an if/else chain was found") else: raise exc class Files_TD(TypedDict): + """Dictionary with stat counts for the diff of a particular file. + + For the :class:`~git.util.Stats.files` attribute of :class:`~git.util.Stats` + objects. + """ + insertions: int deletions: int lines: int + change_type: str class Total_TD(TypedDict): + """Dictionary with total stats from any number of files. + + For the :class:`~git.util.Stats.total` attribute of :class:`~git.util.Stats` + objects. + """ + insertions: int deletions: int lines: int @@ -103,15 +265,21 @@ class Total_TD(TypedDict): class HSH_TD(TypedDict): + """Dictionary carrying the same information as a :class:`~git.util.Stats` object.""" + total: Total_TD files: Dict[PathLike, Files_TD] @runtime_checkable class Has_Repo(Protocol): + """Protocol for having a :attr:`repo` attribute, the repository to operate on.""" + repo: "Repo" @runtime_checkable class Has_id_attribute(Protocol): + """Protocol for having :attr:`_id_attribute_` used in iteration and traversal.""" + _id_attribute_: str diff --git a/git/util.py b/git/util.py index 48901ba..9e8ac82 100644 --- a/git/util.py +++ b/git/util.py @@ -1,28 +1,67 @@ -# utils.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +import sys + +__all__ = [ + "stream_copy", + "join_path", + "to_native_path_linux", + "join_path_native", + "Stats", + "IndexFileSHA1Writer", + "IterableObj", + "IterableList", + "BlockingLockFile", + "LockFile", + "Actor", + "get_user_id", + "assure_directory_exists", + "RemoteProgress", + "CallableRemoteProgress", + "rmtree", + "unbare_repo", + "HIDE_WINDOWS_KNOWN_ERRORS", +] + +if sys.platform == "win32": + __all__.append("to_native_path_windows") from abc import abstractmethod -import os.path as osp -from .compat import is_win import contextlib from functools import wraps import getpass import logging import os +import os.path as osp +import pathlib import platform -import subprocess import re import shutil import stat -from sys import maxsize +import subprocess import time from urllib.parse import urlsplit, urlunsplit import warnings -# from git.objects.util import Traversable +# NOTE: Unused imports can be improved now that CI testing has fully resumed. Some of +# these be used indirectly through other GitPython modules, which avoids having to write +# gitdb all the time in their imports. They are not in __all__, at least currently, +# because they could be removed or changed at any time, and so should not be considered +# conceptually public to code outside GitPython. Linters of course do not like it. +from gitdb.util import ( + LazyMixin, # noqa: F401 + LockedFD, # noqa: F401 + bin_to_hex, # noqa: F401 + file_contents_ro, # noqa: F401 + file_contents_ro_filepath, # noqa: F401 + hex_to_bin, # noqa: F401 + make_sha, + to_bin_sha, # noqa: F401 + to_hex_sha, # noqa: F401 +) # typing --------------------------------------------------------- @@ -40,90 +79,83 @@ from typing import ( Pattern, Sequence, Tuple, + TYPE_CHECKING, TypeVar, Union, cast, - TYPE_CHECKING, overload, ) -import pathlib - if TYPE_CHECKING: + from git.cmd import Git + from git.config import GitConfigParser, SectionConstraint from git.remote import Remote from git.repo.base import Repo - from git.config import GitConfigParser, SectionConstraint - from git import Git - - # from git.objects.base import IndexObject - -from .types import ( +from git.types import ( + Files_TD, + Has_id_attribute, + HSH_TD, Literal, - SupportsIndex, - Protocol, - runtime_checkable, # because behind py version guards PathLike, - HSH_TD, + Protocol, + SupportsIndex, Total_TD, - Files_TD, # aliases - Has_id_attribute, + runtime_checkable, ) +# --------------------------------------------------------------------- + T_IterableObj = TypeVar("T_IterableObj", bound=Union["IterableObj", "Has_id_attribute"], covariant=True) -# So IterableList[Head] is subtype of IterableList[IterableObj] +# So IterableList[Head] is subtype of IterableList[IterableObj]. -# --------------------------------------------------------------------- +_logger = logging.getLogger(__name__) -from gitdb.util import ( # NOQA @IgnorePep8 - make_sha, - LockedFD, # @UnusedImport - file_contents_ro, # @UnusedImport - file_contents_ro_filepath, # @UnusedImport - LazyMixin, # @UnusedImport - to_hex_sha, # @UnusedImport - to_bin_sha, # @UnusedImport - bin_to_hex, # @UnusedImport - hex_to_bin, # @UnusedImport -) +def _read_env_flag(name: str, default: bool) -> bool: + """Read a boolean flag from an environment variable. + :return: + The flag, or the `default` value if absent or ambiguous. + """ + try: + value = os.environ[name] + except KeyError: + return default -# NOTE: Some of the unused imports might be used/imported by others. -# Handle once test-cases are back up and running. -# Most of these are unused here, but are for use by git-python modules so these -# don't see gitdb all the time. Flake of course doesn't like it. -__all__ = [ - "stream_copy", - "join_path", - "to_native_path_linux", - "join_path_native", - "Stats", - "IndexFileSHA1Writer", - "IterableObj", - "IterableList", - "BlockingLockFile", - "LockFile", - "Actor", - "get_user_id", - "assure_directory_exists", - "RemoteProgress", - "CallableRemoteProgress", - "rmtree", - "unbare_repo", - "HIDE_WINDOWS_KNOWN_ERRORS", -] + _logger.warning( + "The %s environment variable is deprecated. Its effect has never been documented and changes without warning.", + name, + ) + + adjusted_value = value.strip().lower() + + if adjusted_value in {"", "0", "false", "no"}: + return False + if adjusted_value in {"1", "true", "yes"}: + return True + _logger.warning("%s has unrecognized value %r, treating as %r.", name, value, default) + return default + + +def _read_win_env_flag(name: str, default: bool) -> bool: + """Read a boolean flag from an environment variable on Windows. -log = logging.getLogger(__name__) + :return: + On Windows, the flag, or the `default` value if absent or ambiguous. + On all other operating systems, ``False``. -# types############################################################ + :note: + This only accesses the environment on Windows. + """ + return sys.platform == "win32" and _read_env_flag(name, default) #: We need an easy way to see if Appveyor TCs start failing, #: so the errors marked with this var are considered "acknowledged" ones, awaiting remedy, #: till then, we wish to hide them. -HIDE_WINDOWS_KNOWN_ERRORS = is_win and os.environ.get("HIDE_WINDOWS_KNOWN_ERRORS", True) -HIDE_WINDOWS_FREEZE_ERRORS = is_win and os.environ.get("HIDE_WINDOWS_FREEZE_ERRORS", True) +HIDE_WINDOWS_KNOWN_ERRORS = _read_win_env_flag("HIDE_WINDOWS_KNOWN_ERRORS", True) +HIDE_WINDOWS_FREEZE_ERRORS = _read_win_env_flag("HIDE_WINDOWS_FREEZE_ERRORS", True) # { Utility Methods @@ -131,8 +163,8 @@ T = TypeVar("T") def unbare_repo(func: Callable[..., T]) -> Callable[..., T]: - """Methods with this decorator raise :class:`.exc.InvalidGitRepositoryError` if they - encounter a bare repository""" + """Methods with this decorator raise :exc:`~git.exc.InvalidGitRepositoryError` if + they encounter a bare repository.""" from .exc import InvalidGitRepositoryError @@ -152,8 +184,9 @@ def unbare_repo(func: Callable[..., T]) -> Callable[..., T]: def cwd(new_dir: PathLike) -> Generator[PathLike, None, None]: """Context manager to temporarily change directory. - This is similar to contextlib.chdir introduced in Python 3.11, but the context - manager object returned by a single call to this function is not reentrant.""" + This is similar to :func:`contextlib.chdir` introduced in Python 3.11, but the + context manager object returned by a single call to this function is not reentrant. + """ old_dir = os.getcwd() os.chdir(new_dir) try: @@ -177,40 +210,54 @@ def patch_env(name: str, value: str) -> Generator[None, None, None]: def rmtree(path: PathLike) -> None: - """Remove the given recursively. + """Remove the given directory tree recursively. + + :note: + We use :func:`shutil.rmtree` but adjust its behaviour to see whether files that + couldn't be deleted are read-only. Windows will not remove them in that case. + """ - :note: we use shutil rmtree but adjust its behaviour to see whether files that - couldn't be deleted are read-only. Windows will not remove them in that case""" + def handler(function: Callable, path: PathLike, _excinfo: Any) -> None: + """Callback for :func:`shutil.rmtree`. - def onerror(func: Callable, path: PathLike, exc_info: str) -> None: - # Is the error an access error ? + This works as either a ``onexc`` or ``onerror`` style callback. + """ + # Is the error an access error? os.chmod(path, stat.S_IWUSR) try: - func(path) # Will scream if still not possible to delete. - except Exception as ex: + function(path) + except PermissionError as ex: if HIDE_WINDOWS_KNOWN_ERRORS: from unittest import SkipTest - raise SkipTest("FIXME: fails with: PermissionError\n {}".format(ex)) from ex + raise SkipTest(f"FIXME: fails with: PermissionError\n {ex}") from ex raise - return shutil.rmtree(path, False, onerror) + if sys.platform != "win32": + shutil.rmtree(path) + elif sys.version_info >= (3, 12): + shutil.rmtree(path, onexc=handler) + else: + shutil.rmtree(path, onerror=handler) def rmfile(path: PathLike) -> None: - """Ensure file deleted also on *Windows* where read-only files need special treatment.""" + """Ensure file deleted also on *Windows* where read-only files need special + treatment.""" if osp.isfile(path): - if is_win: + if sys.platform == "win32": os.chmod(path, 0o777) os.remove(path) def stream_copy(source: BinaryIO, destination: BinaryIO, chunk_size: int = 512 * 1024) -> int: - """Copy all data from the source stream into the destination stream in chunks - of size chunk_size + """Copy all data from the `source` stream into the `destination` stream in chunks + of size `chunk_size`. - :return: amount of bytes written""" + :return: + Number of bytes written + """ br = 0 while True: chunk = source.read(chunk_size) @@ -223,8 +270,8 @@ def stream_copy(source: BinaryIO, destination: BinaryIO, chunk_size: int = 512 * def join_path(a: PathLike, *p: PathLike) -> PathLike: - """Join path tokens together similar to osp.join, but always use - '/' instead of possibly '\' on windows.""" + R"""Join path tokens together similar to osp.join, but always use ``/`` instead of + possibly ``\`` on Windows.""" path = str(a) for b in p: b = str(b) @@ -240,7 +287,7 @@ def join_path(a: PathLike, *p: PathLike) -> PathLike: return path -if is_win: +if sys.platform == "win32": def to_native_path_windows(path: PathLike) -> PathLike: path = str(path) @@ -250,10 +297,9 @@ if is_win: path = str(path) return path.replace("\\", "/") - __all__.append("to_native_path_windows") to_native_path = to_native_path_windows else: - # no need for any work on linux + # No need for any work on Linux. def to_native_path_linux(path: PathLike) -> str: return str(path) @@ -261,19 +307,24 @@ else: def join_path_native(a: PathLike, *p: PathLike) -> PathLike: + R"""Like :func:`join_path`, but makes sure an OS native path is returned. + + This is only needed to play it safe on Windows and to ensure nice paths that only + use ``\``. """ - As join path, but makes sure an OS native path is returned. This is only - needed to play it safe on my dear windows and to assure nice paths that only - use '\'""" return to_native_path(join_path(a, *p)) def assure_directory_exists(path: PathLike, is_file: bool = False) -> bool: - """Assure that the directory pointed to by path exists. + """Make sure that the directory pointed to by path exists. + + :param is_file: + If ``True``, `path` is assumed to be a file and handled correctly. + Otherwise it must be a directory. - :param is_file: If True, path is assumed to be a file and handled correctly. - Otherwise it must be a directory - :return: True if the directory was created, False if it already existed""" + :return: + ``True`` if the directory was created, ``False`` if it already existed. + """ if is_file: path = osp.dirname(path) # END handle file @@ -285,12 +336,27 @@ def assure_directory_exists(path: PathLike, is_file: bool = False) -> bool: def _get_exe_extensions() -> Sequence[str]: PATHEXT = os.environ.get("PATHEXT", None) - return ( - tuple(p.upper() for p in PATHEXT.split(os.pathsep)) if PATHEXT else (".BAT", "COM", ".EXE") if is_win else ("") - ) + if PATHEXT: + return tuple(p.upper() for p in PATHEXT.split(os.pathsep)) + elif sys.platform == "win32": + return (".BAT", ".COM", ".EXE") + else: + return () def py_where(program: str, path: Optional[PathLike] = None) -> List[str]: + """Perform a path search to assist :func:`is_cygwin_git`. + + This is not robust for general use. It is an implementation detail of + :func:`is_cygwin_git`. When a search following all shell rules is needed, + :func:`shutil.which` can be used instead. + + :note: + Neither this function nor :func:`shutil.which` will predict the effect of an + executable search on a native Windows system due to a :class:`subprocess.Popen` + call without ``shell=True``, because shell and non-shell executable search on + Windows differ considerably. + """ # From: http://stackoverflow.com/a/377028/548792 winprog_exts = _get_exe_extensions() @@ -298,7 +364,9 @@ def py_where(program: str, path: Optional[PathLike] = None) -> List[str]: return ( osp.isfile(fpath) and os.access(fpath, os.X_OK) - and (os.name != "nt" or not winprog_exts or any(fpath.upper().endswith(ext) for ext in winprog_exts)) + and ( + sys.platform != "win32" or not winprog_exts or any(fpath.upper().endswith(ext) for ext in winprog_exts) + ) ) progs = [] @@ -349,9 +417,9 @@ _cygpath_parsers: Tuple[Tuple[Pattern[str], Callable, bool], ...] = ( def cygpath(path: str) -> str: - """Use :meth:`git.cmd.Git.polish_url()` instead, that works on any environment.""" - path = str(path) # ensure is str and not AnyPath. - # Fix to use Paths when 3.5 dropped. or to be just str if only for urls? + """Use :meth:`git.cmd.Git.polish_url` instead, that works on any environment.""" + path = str(path) # Ensure is str and not AnyPath. + # Fix to use Paths when 3.5 dropped. Or to be just str if only for URLs? if not path.startswith(("/cygdrive", "//", "/proc/cygdrive")): for regex, parser, recurse in _cygpath_parsers: match = regex.match(path) @@ -384,26 +452,7 @@ def decygpath(path: PathLike) -> str: _is_cygwin_cache: Dict[str, Optional[bool]] = {} -@overload -def is_cygwin_git(git_executable: None) -> Literal[False]: - ... - - -@overload -def is_cygwin_git(git_executable: PathLike) -> bool: - ... - - -def is_cygwin_git(git_executable: Union[None, PathLike]) -> bool: - if is_win: - # is_win seems to be true only for Windows-native pythons - # cygwin has os.name = posix, I think - return False - - if git_executable is None: - return False - - git_executable = str(git_executable) +def _is_cygwin_git(git_executable: str) -> bool: is_cygwin = _is_cygwin_cache.get(git_executable) # type: Optional[bool] if is_cygwin is None: is_cygwin = False @@ -420,31 +469,48 @@ def is_cygwin_git(git_executable: Union[None, PathLike]) -> bool: # retcode = process.poll() is_cygwin = "CYGWIN" in uname_out except Exception as ex: - log.debug("Failed checking if running in CYGWIN due to: %r", ex) + _logger.debug("Failed checking if running in CYGWIN due to: %r", ex) _is_cygwin_cache[git_executable] = is_cygwin return is_cygwin +@overload +def is_cygwin_git(git_executable: None) -> Literal[False]: ... + + +@overload +def is_cygwin_git(git_executable: PathLike) -> bool: ... + + +def is_cygwin_git(git_executable: Union[None, PathLike]) -> bool: + if sys.platform == "win32": # TODO: See if we can use `sys.platform != "cygwin"`. + return False + elif git_executable is None: + return False + else: + return _is_cygwin_git(str(git_executable)) + + def get_user_id() -> str: - """:return: string identifying the currently active system user as name@node""" + """:return: String identifying the currently active system user as ``name@node``""" return "%s@%s" % (getpass.getuser(), platform.node()) def finalize_process(proc: Union[subprocess.Popen, "Git.AutoInterrupt"], **kwargs: Any) -> None: - """Wait for the process (clone, fetch, pull or push) and handle its errors accordingly""" + """Wait for the process (clone, fetch, pull or push) and handle its errors + accordingly.""" # TODO: No close proc-streams?? proc.wait(**kwargs) @overload -def expand_path(p: None, expand_vars: bool = ...) -> None: - ... +def expand_path(p: None, expand_vars: bool = ...) -> None: ... @overload def expand_path(p: PathLike, expand_vars: bool = ...) -> str: - # improve these overloads when 3.5 dropped + # TODO: Support for Python 3.5 has been dropped, so these overloads can be improved. ... @@ -452,20 +518,19 @@ def expand_path(p: Union[None, PathLike], expand_vars: bool = True) -> Optional[ if isinstance(p, pathlib.Path): return p.resolve() try: - p = osp.expanduser(p) # type: ignore + p = osp.expanduser(p) # type: ignore[arg-type] if expand_vars: - p = osp.expandvars(p) # type: ignore - return osp.normpath(osp.abspath(p)) # type: ignore + p = osp.expandvars(p) + return osp.normpath(osp.abspath(p)) except Exception: return None def remove_password_if_present(cmdline: Sequence[str]) -> List[str]: - """ - Parse any command line argument and if on of the element is an URL with a + """Parse any command line argument and if one of the elements is an URL with a username and/or password, replace them by stars (in-place). - If nothing found just returns the command line as-is. + If nothing is found, this just returns the command line as-is. This should be used for every log line that print a command line, as well as exception messages. @@ -475,7 +540,7 @@ def remove_password_if_present(cmdline: Sequence[str]) -> List[str]: new_cmdline.append(to_parse) try: url = urlsplit(to_parse) - # Remove password from the URL if present + # Remove password from the URL if present. if url.password is None and url.username is None: continue @@ -485,7 +550,7 @@ def remove_password_if_present(cmdline: Sequence[str]) -> List[str]: url = url._replace(netloc=url.netloc.replace(url.username, "*****")) new_cmdline[index] = urlunsplit(url) except ValueError: - # This is not a valid URL + # This is not a valid URL. continue return new_cmdline @@ -495,11 +560,10 @@ def remove_password_if_present(cmdline: Sequence[str]) -> List[str]: # { Classes -class RemoteProgress(object): - """ - Handler providing an interface to parse progress information emitted by git-push - and git-fetch and to dispatch callbacks allowing subclasses to react to the progress. - """ +class RemoteProgress: + """Handler providing an interface to parse progress information emitted by + :manpage:`git-push(1)` and :manpage:`git-fetch(1)` and to dispatch callbacks + allowing subclasses to react to the progress.""" _num_op_codes: int = 9 ( @@ -523,8 +587,8 @@ class RemoteProgress(object): "_cur_line", "_seen_ops", "error_lines", # Lines that started with 'error:' or 'fatal:'. - "other_lines", - ) # Lines not denoting progress (i.e.g. push-infos). + "other_lines", # Lines not denoting progress (i.e.g. push-infos). + ) re_op_absolute = re.compile(r"(remote: )?([\w\s]+):\s+()(\d+)()(.*)") re_op_relative = re.compile(r"(remote: )?([\w\s]+):\s+(\d+)% \((\d+)/(\d+)\)(.*)") @@ -535,18 +599,19 @@ class RemoteProgress(object): self.other_lines: List[str] = [] def _parse_progress_line(self, line: AnyStr) -> None: - """Parse progress information from the given line as retrieved by git-push - or git-fetch. + """Parse progress information from the given line as retrieved by + :manpage:`git-push(1)` or :manpage:`git-fetch(1)`. - Lines that do not contain progress info are stored in :attr:`other_lines`. - - Lines that seem to contain an error (i.e. start with error: or fatal:) are stored - in :attr:`error_lines`.""" + - Lines that seem to contain an error (i.e. start with ``error:`` or ``fatal:``) + are stored in :attr:`error_lines`. + """ # handle # Counting objects: 4, done. # Compressing objects: 50% (1/2) # Compressing objects: 100% (2/2) # Compressing objects: 100% (2/2), done. - if isinstance(line, bytes): # mypy argues about ternary assignment + if isinstance(line, bytes): # mypy argues about ternary assignment. line_str = line.decode("utf-8") else: line_str = line @@ -556,20 +621,6 @@ class RemoteProgress(object): self.error_lines.append(self._cur_line) return - # find escape characters and cut them away - regex will not work with - # them as they are non-ascii. As git might expect a tty, it will send them - last_valid_index = None - for i, c in enumerate(reversed(line_str)): - if ord(c) < 32: - # its a slice index - last_valid_index = -i - 1 - # END character was non-ascii - # END for each character in line - if last_valid_index is not None: - line_str = line_str[:last_valid_index] - # END cut away invalid part - line_str = line_str.rstrip() - cur_count, max_count = None, None match = self.re_op_relative.match(line_str) if match is None: @@ -584,7 +635,7 @@ class RemoteProgress(object): op_code = 0 _remote, op_name, _percent, cur_count, max_count, message = match.groups() - # get operation id + # Get operation ID. if op_name == "Counting objects": op_code |= self.COUNTING elif op_name == "Compressing objects": @@ -600,7 +651,7 @@ class RemoteProgress(object): elif op_name == "Checking out files": op_code |= self.CHECKING_OUT else: - # Note: On windows it can happen that partial lines are sent + # Note: On Windows it can happen that partial lines are sent. # Hence we get something like "CompreReceiving objects", which is # a blend of "Compressing objects" and "Receiving objects". # This can't really be prevented, so we drop the line verbosely @@ -608,11 +659,11 @@ class RemoteProgress(object): # commands at some point. self.line_dropped(line_str) # Note: Don't add this line to the other lines, as we have to silently - # drop it - return None + # drop it. + return # END handle op code - # figure out stage + # Figure out stage. if op_code not in self._seen_ops: self._seen_ops.append(op_code) op_code |= self.BEGIN @@ -639,13 +690,15 @@ class RemoteProgress(object): def new_message_handler(self) -> Callable[[str], None]: """ :return: - a progress handler suitable for handle_process_output(), passing lines on to this Progress - handler in a suitable format""" + A progress handler suitable for :func:`~git.cmd.handle_process_output`, + passing lines on to this progress handler in a suitable format. + """ def handler(line: AnyStr) -> None: return self._parse_progress_line(line.rstrip()) - # end + # END handler + return handler def line_dropped(self, line: str) -> None: @@ -659,56 +712,69 @@ class RemoteProgress(object): max_count: Union[str, float, None] = None, message: str = "", ) -> None: - """Called whenever the progress changes + """Called whenever the progress changes. :param op_code: Integer allowing to be compared against Operation IDs and stage IDs. - Stage IDs are BEGIN and END. BEGIN will only be set once for each Operation - ID as well as END. It may be that BEGIN and END are set at once in case only - one progress message was emitted due to the speed of the operation. - Between BEGIN and END, none of these flags will be set + Stage IDs are :const:`BEGIN` and :const:`END`. :const:`BEGIN` will only be + set once for each Operation ID as well as :const:`END`. It may be that + :const:`BEGIN` and :const:`END` are set at once in case only one progress + message was emitted due to the speed of the operation. Between + :const:`BEGIN` and :const:`END`, none of these flags will be set. - Operation IDs are all held within the OP_MASK. Only one Operation ID will - be active per call. - :param cur_count: Current absolute count of items + Operation IDs are all held within the :const:`OP_MASK`. Only one Operation + ID will be active per call. + + :param cur_count: + Current absolute count of items. :param max_count: - The maximum count of items we expect. It may be None in case there is - no maximum number of items or if it is (yet) unknown. + The maximum count of items we expect. It may be ``None`` in case there is no + maximum number of items or if it is (yet) unknown. :param message: - In case of the 'WRITING' operation, it contains the amount of bytes + In case of the :const:`WRITING` operation, it contains the amount of bytes transferred. It may possibly be used for other purposes as well. - You may read the contents of the current line in self._cur_line""" + :note: + You may read the contents of the current line in + :attr:`self._cur_line <_cur_line>`. + """ pass class CallableRemoteProgress(RemoteProgress): - """An implementation forwarding updates to any callable""" + """A :class:`RemoteProgress` implementation forwarding updates to any callable. - __slots__ = "_callable" + :note: + Like direct instances of :class:`RemoteProgress`, instances of this + :class:`CallableRemoteProgress` class are not themselves directly callable. + Rather, instances of this class wrap a callable and forward to it. This should + therefore not be confused with :class:`git.types.CallableProgress`. + """ + + __slots__ = ("_callable",) def __init__(self, fn: Callable) -> None: self._callable = fn - super(CallableRemoteProgress, self).__init__() + super().__init__() def update(self, *args: Any, **kwargs: Any) -> None: self._callable(*args, **kwargs) -class Actor(object): - """Actors hold information about a person acting on the repository. They - can be committers and authors or anything with a name and an email as - mentioned in the git log entries.""" +class Actor: + """Actors hold information about a person acting on the repository. They can be + committers and authors or anything with a name and an email as mentioned in the git + log entries.""" # PRECOMPILED REGEX name_only_regex = re.compile(r"<(.*)>") name_email_regex = re.compile(r"(.*) <(.*?)>") # ENVIRONMENT VARIABLES - # read when creating new commits + # These are read when creating new commits. env_author_name = "GIT_AUTHOR_NAME" env_author_email = "GIT_AUTHOR_EMAIL" env_committer_name = "GIT_COMMITTER_NAME" @@ -741,12 +807,16 @@ class Actor(object): @classmethod def _from_string(cls, string: str) -> "Actor": - """Create an Actor from a string. - :param string: is the string, which is expected to be in regular git format + """Create an :class:`Actor` from a string. + + :param string: + The string, which is expected to be in regular git format:: John Doe <jdoe@example.com> - :return: Actor""" + :return: + :class:`Actor` + """ m = cls.name_email_regex.search(string) if m: name, email = m.groups() @@ -755,7 +825,7 @@ class Actor(object): m = cls.name_only_regex.search(string) if m: return Actor(m.group(1), None) - # assume best and use the whole string as name + # Assume the best and use the whole string as name. return Actor(string, None) # END special case name # END handle name/email matching @@ -768,7 +838,7 @@ class Actor(object): config_reader: Union[None, "GitConfigParser", "SectionConstraint"] = None, ) -> "Actor": actor = Actor("", "") - user_id = None # We use this to avoid multiple calls to getpass.getuser() + user_id = None # We use this to avoid multiple calls to getpass.getuser(). def default_email() -> str: nonlocal user_id @@ -803,28 +873,30 @@ class Actor(object): @classmethod def committer(cls, config_reader: Union[None, "GitConfigParser", "SectionConstraint"] = None) -> "Actor": """ - :return: Actor instance corresponding to the configured committer. It behaves - similar to the git implementation, such that the environment will override - configuration values of config_reader. If no value is set at all, it will be - generated - :param config_reader: ConfigReader to use to retrieve the values from in case - they are not set in the environment""" + :return: + :class:`Actor` instance corresponding to the configured committer. It + behaves similar to the git implementation, such that the environment will + override configuration values of `config_reader`. If no value is set at all, + it will be generated. + + :param config_reader: + ConfigReader to use to retrieve the values from in case they are not set in + the environment. + """ return cls._main_actor(cls.env_committer_name, cls.env_committer_email, config_reader) @classmethod def author(cls, config_reader: Union[None, "GitConfigParser", "SectionConstraint"] = None) -> "Actor": - """Same as committer(), but defines the main author. It may be specified in the environment, - but defaults to the committer""" + """Same as :meth:`committer`, but defines the main author. It may be specified + in the environment, but defaults to the committer.""" return cls._main_actor(cls.env_author_name, cls.env_author_email, config_reader) -class Stats(object): - - """ - Represents stat information as presented by git at the end of a merge. It is +class Stats: + """Represents stat information as presented by git at the end of a merge. It is created from the output of a diff operation. - ``Example``:: + Example:: c = Commit( sha1 ) s = c.stats @@ -838,31 +910,36 @@ class Stats(object): deletions = number of deleted lines as int insertions = number of inserted lines as int lines = total number of lines changed as int, or deletions + insertions + change_type = type of change as str, A|C|D|M|R|T|U|X|B ``full-stat-dict`` In addition to the items in the stat-dict, it features additional information:: - files = number of changed files as int""" + files = number of changed files as int + """ __slots__ = ("total", "files") - def __init__(self, total: Total_TD, files: Dict[PathLike, Files_TD]): + def __init__(self, total: Total_TD, files: Dict[PathLike, Files_TD]) -> None: self.total = total self.files = files @classmethod def _list_from_string(cls, repo: "Repo", text: str) -> "Stats": - """Create a Stat object from output retrieved by git-diff. + """Create a :class:`Stats` object from output retrieved by + :manpage:`git-diff(1)`. - :return: git.Stat""" + :return: + :class:`git.Stats` + """ hsh: HSH_TD = { "total": {"insertions": 0, "deletions": 0, "lines": 0, "files": 0}, "files": {}, } for line in text.splitlines(): - (raw_insertions, raw_deletions, filename) = line.split("\t") + (change_type, raw_insertions, raw_deletions, filename) = line.split("\t") insertions = raw_insertions != "-" and int(raw_insertions) or 0 deletions = raw_deletions != "-" and int(raw_deletions) or 0 hsh["total"]["insertions"] += insertions @@ -873,20 +950,22 @@ class Stats(object): "insertions": insertions, "deletions": deletions, "lines": insertions + deletions, + "change_type": change_type, } hsh["files"][filename.strip()] = files_dict return Stats(hsh["total"], hsh["files"]) -class IndexFileSHA1Writer(object): - - """Wrapper around a file-like object that remembers the SHA1 of - the data written to it. It will write a sha when the stream is closed - or if the asked for explicitly using write_sha. +class IndexFileSHA1Writer: + """Wrapper around a file-like object that remembers the SHA1 of the data written to + it. It will write a sha when the stream is closed or if asked for explicitly using + :meth:`write_sha`. - Only useful to the indexfile + Only useful to the index file. - :note: Based on the dulwich project""" + :note: + Based on the dulwich project. + """ __slots__ = ("f", "sha1") @@ -912,14 +991,14 @@ class IndexFileSHA1Writer(object): return self.f.tell() -class LockFile(object): - +class LockFile: """Provides methods to obtain, check for, and release a file based lock which should be used to handle concurrent access to the same file. As we are a utility class to be derived from, we only use protected methods. - Locks will automatically be released on destruction""" + Locks will automatically be released on destruction. + """ __slots__ = ("_file_path", "_owns_lock") @@ -935,14 +1014,22 @@ class LockFile(object): return "%s.lock" % (self._file_path) def _has_lock(self) -> bool: - """:return: True if we have a lock and if the lockfile still exists - :raise AssertionError: if our lock-file does not exist""" + """ + :return: + True if we have a lock and if the lockfile still exists + + :raise AssertionError: + If our lock-file does not exist. + """ return self._owns_lock def _obtain_lock_or_raise(self) -> None: - """Create a lock file as flag for other instances, mark our instance as lock-holder + """Create a lock file as flag for other instances, mark our instance as + lock-holder. - :raise IOError: if a lock was already present or a lock file could not be written""" + :raise IOError: + If a lock was already present or a lock file could not be written. + """ if self._has_lock(): return lock_file = self._lock_file_path() @@ -962,16 +1049,18 @@ class LockFile(object): def _obtain_lock(self) -> None: """The default implementation will raise if a lock cannot be obtained. - Subclasses may override this method to provide a different implementation""" + + Subclasses may override this method to provide a different implementation. + """ return self._obtain_lock_or_raise() def _release_lock(self) -> None: - """Release our lock if we have one""" + """Release our lock if we have one.""" if not self._has_lock(): return - # if someone removed our file beforhand, lets just flag this issue - # instead of failing, to make it more usable. + # If someone removed our file beforehand, lets just flag this issue instead of + # failing, to make it more usable. lfp = self._lock_file_path() try: rmfile(lfp) @@ -981,13 +1070,13 @@ class LockFile(object): class BlockingLockFile(LockFile): + """The lock file will block until a lock could be obtained, or fail after a + specified timeout. - """The lock file will block until a lock could be obtained, or fail after - a specified timeout. - - :note: If the directory containing the lock was removed, an exception will - be raised during the blocking period, preventing hangs as the lock - can never be obtained.""" + :note: + If the directory containing the lock was removed, an exception will be raised + during the blocking period, preventing hangs as the lock can never be obtained. + """ __slots__ = ("_check_interval", "_max_block_time") @@ -995,28 +1084,32 @@ class BlockingLockFile(LockFile): self, file_path: PathLike, check_interval_s: float = 0.3, - max_block_time_s: int = maxsize, + max_block_time_s: int = sys.maxsize, ) -> None: - """Configure the instance + """Configure the instance. :param check_interval_s: Period of time to sleep until the lock is checked the next time. - By default, it waits a nearly unlimited time + By default, it waits a nearly unlimited time. - :param max_block_time_s: Maximum amount of seconds we may lock""" - super(BlockingLockFile, self).__init__(file_path) + :param max_block_time_s: + Maximum amount of seconds we may lock. + """ + super().__init__(file_path) self._check_interval = check_interval_s self._max_block_time = max_block_time_s def _obtain_lock(self) -> None: - """This method blocks until it obtained the lock, or raises IOError if - it ran out of time or if the parent directory was not available anymore. - If this method returns, you are guaranteed to own the lock""" + """This method blocks until it obtained the lock, or raises :exc:`IOError` if it + ran out of time or if the parent directory was not available anymore. + + If this method returns, you are guaranteed to own the lock. + """ starttime = time.time() maxtime = starttime + float(self._max_block_time) while True: try: - super(BlockingLockFile, self)._obtain_lock() + super()._obtain_lock() except IOError as e: # synity check: if the directory leading to the lockfile is not # readable anymore, raise an exception @@ -1043,36 +1136,45 @@ class BlockingLockFile(LockFile): class IterableList(List[T_IterableObj]): - - """ - List of iterable objects allowing to query an object by id or by named index:: + """List of iterable objects allowing to query an object by id or by named index:: heads = repo.heads heads.master heads['master'] heads[0] - Iterable parent objects = [Commit, SubModule, Reference, FetchInfo, PushInfo] - Iterable via inheritance = [Head, TagReference, RemoteReference] - ] - It requires an id_attribute name to be set which will be queried from its + Iterable parent objects: + + * :class:`Commit <git.objects.Commit>` + * :class:`Submodule <git.objects.submodule.base.Submodule>` + * :class:`Reference <git.refs.reference.Reference>` + * :class:`FetchInfo <git.remote.FetchInfo>` + * :class:`PushInfo <git.remote.PushInfo>` + + Iterable via inheritance: + + * :class:`Head <git.refs.head.Head>` + * :class:`TagReference <git.refs.tag.TagReference>` + * :class:`RemoteReference <git.refs.remote.RemoteReference>` + + This requires an ``id_attribute`` name to be set which will be queried from its contained items to have a means for comparison. - A prefix can be specified which is to be used in case the id returned by the - items always contains a prefix that does not matter to the user, so it - can be left out.""" + A prefix can be specified which is to be used in case the id returned by the items + always contains a prefix that does not matter to the user, so it can be left out. + """ __slots__ = ("_id_attr", "_prefix") def __new__(cls, id_attr: str, prefix: str = "") -> "IterableList[T_IterableObj]": - return super(IterableList, cls).__new__(cls) + return super().__new__(cls) def __init__(self, id_attr: str, prefix: str = "") -> None: self._id_attr = id_attr self._prefix = prefix def __contains__(self, attr: object) -> bool: - # first try identity match for performance + # First try identity match for performance. try: rval = list.__contains__(self, attr) if rval: @@ -1081,9 +1183,9 @@ class IterableList(List[T_IterableObj]): pass # END handle match - # otherwise make a full name search + # Otherwise make a full name search. try: - getattr(self, cast(str, attr)) # use cast to silence mypy + getattr(self, cast(str, attr)) # Use cast to silence mypy. return True except (AttributeError, TypeError): return False @@ -1097,7 +1199,7 @@ class IterableList(List[T_IterableObj]): # END for each item return list.__getattribute__(self, attr) - def __getitem__(self, index: Union[SupportsIndex, int, slice, str]) -> T_IterableObj: # type: ignore + def __getitem__(self, index: Union[SupportsIndex, int, slice, str]) -> T_IterableObj: # type: ignore[override] assert isinstance(index, (int, str, slice)), "Index of IterableList should be an int or str" if isinstance(index, int): @@ -1131,89 +1233,114 @@ class IterableList(List[T_IterableObj]): list.__delitem__(self, delindex) +@runtime_checkable +class IterableObj(Protocol): + """Defines an interface for iterable items, so there is a uniform way to retrieve + and iterate items within the git repository. + + Subclasses: + + * :class:`Submodule <git.objects.submodule.base.Submodule>` + * :class:`Commit <git.objects.Commit>` + * :class:`Reference <git.refs.reference.Reference>` + * :class:`PushInfo <git.remote.PushInfo>` + * :class:`FetchInfo <git.remote.FetchInfo>` + * :class:`Remote <git.remote.Remote>` + """ + + __slots__ = () + + _id_attribute_: str + + @classmethod + @abstractmethod + def iter_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> Iterator[T_IterableObj]: + # Return-typed to be compatible with subtypes e.g. Remote. + """Find (all) items of this type. + + Subclasses can specify `args` and `kwargs` differently, and may use them for + filtering. However, when the method is called with no additional positional or + keyword arguments, subclasses are obliged to to yield all items. + + :return: + Iterator yielding Items + """ + raise NotImplementedError("To be implemented by Subclass") + + @classmethod + def list_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> IterableList[T_IterableObj]: + """Find (all) items of this type and collect them into a list. + + For more information about the arguments, see :meth:`iter_items`. + + :note: + Favor the :meth:`iter_items` method as it will avoid eagerly collecting all + items. When there are many items, that can slow performance and increase + memory usage. + + :return: + list(Item,...) list of item instances + """ + out_list: IterableList = IterableList(cls._id_attribute_) + out_list.extend(cls.iter_items(repo, *args, **kwargs)) + return out_list + + class IterableClassWatcher(type): - """Metaclass that watches""" + """Metaclass that issues :exc:`DeprecationWarning` when :class:`git.util.Iterable` + is subclassed.""" def __init__(cls, name: str, bases: Tuple, clsdict: Dict) -> None: for base in bases: if type(base) is IterableClassWatcher: warnings.warn( - f"GitPython Iterable subclassed by {name}. " - "Iterable is deprecated due to naming clash since v3.1.18" - " and will be removed in 3.1.20, " - "Use IterableObj instead \n", + f"GitPython Iterable subclassed by {name}." + " Iterable is deprecated due to naming clash since v3.1.18" + " and will be removed in 4.0.0." + " Use IterableObj instead.", DeprecationWarning, stacklevel=2, ) class Iterable(metaclass=IterableClassWatcher): + """Deprecated, use :class:`IterableObj` instead. - """Defines an interface for iterable items which is to assure a uniform - way to retrieve and iterate items within the git repository""" + Defines an interface for iterable items, so there is a uniform way to retrieve + and iterate items within the git repository. + """ __slots__ = () + _id_attribute_ = "attribute that most suitably identifies your instance" @classmethod - def list_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> Any: - """ - Deprecated, use IterableObj instead. - Find all items of this type - subclasses can specify args and kwargs differently. - If no args are given, subclasses are obliged to return all items if no additional - arguments arg given. + def iter_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> Any: + """Deprecated, use :class:`IterableObj` instead. - :note: Favor the iter_items method as it will + Find (all) items of this type. - :return: list(Item,...) list of item instances""" - out_list: Any = IterableList(cls._id_attribute_) - out_list.extend(cls.iter_items(repo, *args, **kwargs)) - return out_list + See :meth:`IterableObj.iter_items` for details on usage. - @classmethod - def iter_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> Any: - # return typed to be compatible with subtypes e.g. Remote - """For more information about the arguments, see list_items - :return: iterator yielding Items""" + :return: + Iterator yielding Items + """ raise NotImplementedError("To be implemented by Subclass") + @classmethod + def list_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> Any: + """Deprecated, use :class:`IterableObj` instead. -@runtime_checkable -class IterableObj(Protocol): - """Defines an interface for iterable items which is to assure a uniform - way to retrieve and iterate items within the git repository - - Subclasses = [Submodule, Commit, Reference, PushInfo, FetchInfo, Remote]""" + Find (all) items of this type and collect them into a list. - __slots__ = () - _id_attribute_: str + See :meth:`IterableObj.list_items` for details on usage. - @classmethod - def list_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> IterableList[T_IterableObj]: + :return: + list(Item,...) list of item instances """ - Find all items of this type - subclasses can specify args and kwargs differently. - If no args are given, subclasses are obliged to return all items if no additional - arguments arg given. - - :note: Favor the iter_items method as it will - - :return: list(Item,...) list of item instances""" - out_list: IterableList = IterableList(cls._id_attribute_) + out_list: Any = IterableList(cls._id_attribute_) out_list.extend(cls.iter_items(repo, *args, **kwargs)) return out_list - @classmethod - @abstractmethod - def iter_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> Iterator[T_IterableObj]: # Iterator[T_IterableObj]: - # return typed to be compatible with subtypes e.g. Remote - """For more information about the arguments, see list_items - :return: iterator yielding Items""" - raise NotImplementedError("To be implemented by Subclass") - # } END classes - - -class NullHandler(logging.Handler): - def emit(self, record: object) -> None: - pass diff --git a/init-tests-after-clone.sh b/init-tests-after-clone.sh index 95ced98..bfada01 100755 --- a/init-tests-after-clone.sh +++ b/init-tests-after-clone.sh @@ -1,18 +1,74 @@ -#!/usr/bin/env bash +#!/bin/sh +# +# This file is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ -set -e +set -eu -if [[ -z "$TRAVIS" ]]; then - read -rp "This operation will destroy locally modified files. Continue ? [N/y]: " answer - if [[ ! $answer =~ [yY] ]]; then - exit 2 - fi +fallback_repo_for_tags='https://github.com/gitpython-developers/GitPython.git' + +ci() { + # For now, check just these, as a false positive could lead to data loss. + test -n "${TRAVIS-}" || test -n "${GITHUB_ACTIONS-}" +} + +no_version_tags() { + test -z "$(git tag -l '[0-9]*' 'v[0-9]*')" +} + +warn() { + if test -n "${GITHUB_ACTIONS-}"; then + printf '::warning ::%s\n' "$*" >&2 # Annotate workflow. + else + printf '%s\n' "$@" >&2 + fi +} + +if ! ci; then + printf 'This operation will destroy locally modified files. Continue ? [N/y]: ' >&2 + read -r answer + case "$answer" in + [yY]) + ;; + *) + exit 2 ;; + esac fi +# Stop if we have run this. (You can delete __testing_point__ to let it rerun.) +# This also keeps track of where we are, so we can get back here. git tag __testing_point__ -git checkout master || git checkout -b master + +# The tests need a branch called master. +git checkout master -- || git checkout -b master + +# The tests need a reflog history on the master branch. git reset --hard HEAD~1 git reset --hard HEAD~1 git reset --hard HEAD~1 + +# Point the master branch where we started, so we test the correct code. git reset --hard __testing_point__ + +# The tests need submodules, including a submodule with a submodule. git submodule update --init --recursive + +# The tests need some version tags. Try to get them even in forks. This fetches +# other objects too. So, locally, we always do it, for a consistent experience. +if ! ci || no_version_tags; then + git fetch --all --tags +fi + +# If we still have no version tags, try to get them from the original repo. +if no_version_tags; then + warn 'No local or remote version tags found. Trying fallback remote:' \ + "$fallback_repo_for_tags" + + # git fetch supports * but not [], and --no-tags means no *other* tags, so... + printf 'refs/tags/%d*:refs/tags/%d*\n' 0 0 1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9 | + xargs git fetch --no-tags "$fallback_repo_for_tags" + + if no_version_tags; then + warn 'No version tags found anywhere. Some tests will fail.' + fi +fi diff --git a/pyproject.toml b/pyproject.toml index fa06458..090972e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,33 +3,32 @@ requires = ["setuptools"] build-backend = "setuptools.build_meta" [tool.pytest.ini_options] -python_files = 'test_*.py' -testpaths = 'test' # space separated list of paths from root e.g test tests doc/testing -addopts = '--cov=git --cov-report=term --maxfail=10 --force-sugar --disable-warnings' -filterwarnings = 'ignore::DeprecationWarning' +addopts = "--cov=git --cov-report=term -ra" +filterwarnings = "ignore::DeprecationWarning" +python_files = "test_*.py" +tmp_path_retention_policy = "failed" +testpaths = "test" # Space separated list of paths from root e.g test tests doc/testing. # --cov coverage # --cov-report term # send report to terminal term-missing -> terminal with line numbers html xml # --cov-report term-missing # to terminal with line numbers # --cov-report html:path # html file at path # --maxfail # number of errors before giving up -# -disable-warnings # Disable pytest warnings (not codebase warnings) -# -rf # increased reporting of failures -# -rE # increased reporting of errors +# -rfE # default test summary: list fail and error +# -ra # test summary: list all non-passing (fail, error, skip, xfail, xpass) # --ignore-glob=**/gitdb/* # ignore glob paths # filterwarnings ignore::WarningType # ignores those warnings [tool.mypy] -python_version = "3.7" +python_version = "3.8" +files = ["git/", "test/deprecation/"] disallow_untyped_defs = true no_implicit_optional = true warn_redundant_casts = true -# warn_unused_ignores = true +warn_unused_ignores = true # Useful in general, but especially in test/deprecation. warn_unreachable = true -show_error_codes = true implicit_reexport = true # strict = true - -# TODO: remove when 'gitdb' is fully annotated +# TODO: Remove when 'gitdb' is fully annotated. exclude = ["^git/ext/gitdb"] [[tool.mypy.overrides]] module = "gitdb.*" @@ -42,7 +41,49 @@ source = ["git"] include = ["*/git/*"] omit = ["*/git/ext/*"] -[tool.black] +[tool.ruff] +target-version = "py37" line-length = 120 -target-version = ['py37'] -extend-exclude = "git/ext/gitdb" +# Exclude a variety of commonly ignored directories. +exclude = [ + "git/ext/", + "build", + "dist", +] +# Enable Pyflakes `E` and `F` codes by default. +lint.select = [ + "E", + "W", # See: https://pypi.org/project/pycodestyle + "F", # See: https://pypi.org/project/pyflakes + # "I", # See: https://pypi.org/project/isort/ + # "S", # See: https://pypi.org/project/flake8-bandit + # "UP", # See: https://docs.astral.sh/ruff/rules/#pyupgrade-up +] +lint.extend-select = [ + # "A", # See: https://pypi.org/project/flake8-builtins + "B", # See: https://pypi.org/project/flake8-bugbear + "C4", # See: https://pypi.org/project/flake8-comprehensions + "TCH004", # See: https://docs.astral.sh/ruff/rules/runtime-import-in-type-checking-block/ +] +lint.ignore = [ + "E203", # Whitespace before ':' + "E731", # Do not assign a `lambda` expression, use a `def` +] +lint.ignore-init-module-imports = true +lint.unfixable = [ + "F401", # Module imported but unused +] + +[tool.ruff.lint.per-file-ignores] +"test/**" = [ + "B018", # useless-expression +] +"fuzzing/fuzz-targets/**" = [ + "E402", # environment setup must happen before the `git` module is imported, thus cannot happen at top of file +] + + +[tool.codespell] +ignore-words-list="afile,assertIn,doesnt,gud,uptodate" +#count = true +quiet-level = 3 diff --git a/requirements-dev.txt b/requirements-dev.txt index e3030c5..f626644 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,9 +1,8 @@ -r requirements.txt -r test-requirements.txt -# libraries for additional local testing/linting - to be added to test-requirements.txt when all pass - -flake8-type-checking;python_version>="3.8" # checks for TYPE_CHECKING only imports - +# For additional local testing/linting - to be added elsewhere eventually. +ruff +shellcheck pytest-icdiff # pytest-profiling diff --git a/setup.py b/setup.py index 90df8d7..f28fedb 100755 --- a/setup.py +++ b/setup.py @@ -1,23 +1,24 @@ #!/usr/bin/env python +import os +from pathlib import Path +import sys from typing import Sequence + from setuptools import setup, find_packages from setuptools.command.build_py import build_py as _build_py from setuptools.command.sdist import sdist as _sdist -import os -import sys -with open(os.path.join(os.path.dirname(__file__), "VERSION")) as ver_file: - VERSION = ver_file.readline().strip() -with open("requirements.txt") as reqs_file: - requirements = reqs_file.read().splitlines() +def _read_content(path: str) -> str: + return (Path(__file__).parent / path).read_text(encoding="utf-8") -with open("test-requirements.txt") as reqs_file: - test_requirements = reqs_file.read().splitlines() -with open("README.md") as rm_file: - long_description = rm_file.read() +version = _read_content("VERSION").strip() +requirements = _read_content("requirements.txt").splitlines() +test_requirements = _read_content("test-requirements.txt").splitlines() +doc_requirements = _read_content("doc/requirements.txt").splitlines() +long_description = _read_content("README.md") class build_py(_build_py): @@ -48,7 +49,7 @@ def _stamp_version(filename: str) -> None: with open(filename) as f: for line in f: if "__version__ =" in line: - line = line.replace('"git"', "'%s'" % VERSION) + line = line.replace('"git"', "'%s'" % version) found = True out.append(line) except OSError: @@ -64,18 +65,21 @@ def _stamp_version(filename: str) -> None: setup( name="GitPython", cmdclass={"build_py": build_py, "sdist": sdist}, - version=VERSION, + version=version, description="GitPython is a Python library used to interact with Git repositories", author="Sebastian Thiel, Michael Trier", author_email="byronimo@gmail.com, mtrier@gmail.com", - license="BSD", + license="BSD-3-Clause", url="https://github.com/gitpython-developers/GitPython", packages=find_packages(exclude=["test", "test.*"]), include_package_data=True, package_dir={"git": "git"}, python_requires=">=3.7", install_requires=requirements, - extras_require={"test": test_requirements}, + extras_require={ + "test": test_requirements, + "doc": doc_requirements, + }, zip_safe=False, long_description=long_description, long_description_content_type="text/markdown", diff --git a/test-requirements.txt b/test-requirements.txt index b00dd6f..75e9e81 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1,8 +1,11 @@ -black coverage[toml] -ddt>=1.1.1, !=1.4.3 +ddt >= 1.1.1, != 1.4.3 +mock ; python_version < "3.8" mypy pre-commit -pytest +pytest >= 7.3.1 pytest-cov +pytest-instafail +pytest-mock pytest-sugar +typing-extensions ; python_version < "3.11" diff --git a/test/__init__.py b/test/__init__.py index a3d5145..fbaebcd 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -1,5 +1,4 @@ -# __init__.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ diff --git a/test/deprecation/__init__.py b/test/deprecation/__init__.py new file mode 100644 index 0000000..fec3126 --- /dev/null +++ b/test/deprecation/__init__.py @@ -0,0 +1,19 @@ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Tests of deprecation warnings and possible related attribute bugs. + +Most deprecation warnings are "basic" in the sense that there is no special complexity +to consider, in introducing them. However, to issue deprecation warnings on mere +attribute access can involve adding new dynamic behavior. This can lead to subtle bugs +or less useful dynamic metadata. It can also weaken static typing, as happens if a type +checker sees a method like ``__getattr__`` in a module or class whose attributes it did +not already judge to be dynamic. This test.deprecation submodule covers all three cases: +the basic cases, subtle dynamic behavior, and subtle static type checking issues. + +Static type checking is "tested" by a combination of code that should not be treated as +a type error but would be in the presence of particular bugs, and code that *should* be +treated as a type error and is accordingly marked ``# type: ignore[REASON]`` (for +specific ``REASON``. The latter will only produce mypy errors when the expectation is +not met if it is configured with ``warn_unused_ignores = true``. +""" diff --git a/test/deprecation/lib.py b/test/deprecation/lib.py new file mode 100644 index 0000000..9fe623a --- /dev/null +++ b/test/deprecation/lib.py @@ -0,0 +1,27 @@ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Support library for deprecation tests.""" + +__all__ = ["assert_no_deprecation_warning", "suppress_deprecation_warning"] + +import contextlib +import warnings + +from typing import Generator + + +@contextlib.contextmanager +def assert_no_deprecation_warning() -> Generator[None, None, None]: + """Context manager to assert that code does not issue any deprecation warnings.""" + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + warnings.simplefilter("error", PendingDeprecationWarning) + yield + + +@contextlib.contextmanager +def suppress_deprecation_warning() -> Generator[None, None, None]: + with warnings.catch_warnings(): + warnings.simplefilter("ignore", DeprecationWarning) + yield diff --git a/test/deprecation/test_basic.py b/test/deprecation/test_basic.py new file mode 100644 index 0000000..3bf0287 --- /dev/null +++ b/test/deprecation/test_basic.py @@ -0,0 +1,137 @@ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Tests of assorted deprecation warnings when there are no extra subtleties to check. + +This tests deprecation warnings where all that needs be verified is that a deprecated +property, function, or class issues a DeprecationWarning when used and, if applicable, +that recommended alternatives do not issue the warning. + +This is in contrast to other modules within test.deprecation, which test warnings where +there is a risk of breaking other runtime behavior, or of breaking static type checking +or making it less useful, by introducing the warning or in plausible future changes to +how the warning is implemented. That happens when it is necessary to customize attribute +access on a module or class, in a way it was not customized before, to issue a warning. +It is inapplicable to the deprecations whose warnings are tested in this module. +""" + +import pytest + +from git.diff import NULL_TREE +from git.objects.util import Traversable +from git.repo import Repo +from git.util import Iterable as _Iterable, IterableObj + +from .lib import assert_no_deprecation_warning + +# typing ----------------------------------------------------------------- + +from typing import Generator, TYPE_CHECKING + +if TYPE_CHECKING: + from pathlib import Path + + from git.diff import Diff, DiffIndex + from git.objects.commit import Commit + +# ------------------------------------------------------------------------ + + +@pytest.fixture +def commit(tmp_path: "Path") -> Generator["Commit", None, None]: + """Fixture to supply a one-commit repo's commit, enough for deprecation tests.""" + (tmp_path / "a.txt").write_text("hello\n", encoding="utf-8") + repo = Repo.init(tmp_path) + repo.index.add(["a.txt"]) + yield repo.index.commit("Initial commit") + repo.close() + + +@pytest.fixture +def diff(commit: "Commit") -> Generator["Diff", None, None]: + """Fixture to supply a single-file diff.""" + (diff,) = commit.diff(NULL_TREE) # Exactly one file in the diff. + yield diff + + +@pytest.fixture +def diffs(commit: "Commit") -> Generator["DiffIndex", None, None]: + """Fixture to supply a DiffIndex.""" + yield commit.diff(NULL_TREE) + + +def test_diff_renamed_warns(diff: "Diff") -> None: + """The deprecated Diff.renamed property issues a deprecation warning.""" + with pytest.deprecated_call(): + diff.renamed + + +def test_diff_renamed_file_does_not_warn(diff: "Diff") -> None: + """The preferred Diff.renamed_file property issues no deprecation warning.""" + with assert_no_deprecation_warning(): + diff.renamed_file + + +def test_commit_trailers_warns(commit: "Commit") -> None: + """The deprecated Commit.trailers property issues a deprecation warning.""" + with pytest.deprecated_call(): + commit.trailers + + +def test_commit_trailers_list_does_not_warn(commit: "Commit") -> None: + """The nondeprecated Commit.trailers_list property issues no deprecation warning.""" + with assert_no_deprecation_warning(): + commit.trailers_list + + +def test_commit_trailers_dict_does_not_warn(commit: "Commit") -> None: + """The nondeprecated Commit.trailers_dict property issues no deprecation warning.""" + with assert_no_deprecation_warning(): + commit.trailers_dict + + +def test_traverse_list_traverse_in_base_class_warns(commit: "Commit") -> None: + """Traversable.list_traverse's base implementation issues a deprecation warning.""" + with pytest.deprecated_call(): + Traversable.list_traverse(commit) + + +def test_traversable_list_traverse_override_does_not_warn(commit: "Commit") -> None: + """Calling list_traverse on concrete subclasses is not deprecated, does not warn.""" + with assert_no_deprecation_warning(): + commit.list_traverse() + + +def test_traverse_traverse_in_base_class_warns(commit: "Commit") -> None: + """Traversable.traverse's base implementation issues a deprecation warning.""" + with pytest.deprecated_call(): + Traversable.traverse(commit) + + +def test_traverse_traverse_override_does_not_warn(commit: "Commit") -> None: + """Calling traverse on concrete subclasses is not deprecated, does not warn.""" + with assert_no_deprecation_warning(): + commit.traverse() + + +def test_iterable_inheriting_warns() -> None: + """Subclassing the deprecated git.util.Iterable issues a deprecation warning.""" + with pytest.deprecated_call(): + + class Derived(_Iterable): + pass + + +def test_iterable_obj_inheriting_does_not_warn() -> None: + """Subclassing git.util.IterableObj is not deprecated, does not warn.""" + with assert_no_deprecation_warning(): + + class Derived(IterableObj): + pass + + +def test_diff_iter_change_type(diffs: "DiffIndex") -> None: + """The internal DiffIndex.iter_change_type function issues no deprecation warning.""" + with assert_no_deprecation_warning(): + for change_type in diffs.change_type: + [*diffs.iter_change_type(change_type=change_type)] diff --git a/test/deprecation/test_cmd_git.py b/test/deprecation/test_cmd_git.py new file mode 100644 index 0000000..e444902 --- /dev/null +++ b/test/deprecation/test_cmd_git.py @@ -0,0 +1,391 @@ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Tests for dynamic and static characteristics of Git class and instance attributes. + +Currently this all relates to the deprecated :attr:`Git.USE_SHELL` class attribute, +which can also be accessed through instances. Some tests directly verify its behavior, +including deprecation warnings, while others verify that other aspects of attribute +access are not inadvertently broken by mechanisms introduced to issue the warnings. + +A note on multiprocessing +========================= + +Because USE_SHELL has no instance state, this module does not include tests of pickling +and multiprocessing: + +- Just as with a simple class attribute, when a class attribute with custom logic is set + to another value, even before a worker process is created that uses the class, the + worker process may see either the initial or new value, depending on the process start + method. With "fork", changes are preserved. With "spawn" or "forkserver", re-importing + the modules causes initial values to be set. Then the value in the parent at the time + it dispatches the task is only set in the children if the parent has the task set it, + or if it is set as a side effect of importing needed modules, or of unpickling objects + passed to the child (for example, if it is set in a top-level statement of the module + that defines the function submitted for the child worker process to call). + +- When an attribute gains new logic provided by a property or custom descriptor, and the + attribute involves instance-level state, incomplete or corrupted pickling can break + multiprocessing. (For example, when an instance attribute is reimplemented using a + descriptor that stores data in a global WeakKeyDictionary, pickled instances should be + tested to ensure they are still working correctly.) But nothing like that applies + here, because instance state is not involved. Although the situation is inherently + complex as described above, it is independent of the attribute implementation. + +- That USE_SHELL cannot be set on instances, and that when retrieved on instances it + always gives the same value as on the class, is covered in the tests here. + +A note on metaclass conflicts +============================= + +The most important DeprecationWarning is for code like ``Git.USE_SHELL = True``, which +is a security risk. But this warning may not be possible to implement without a custom +metaclass. This is because a descriptor in a class can customize all forms of attribute +access on its instances, but can only customize getting an attribute on the class. +Retrieving a descriptor from a class calls its ``__get__`` method (if defined), but +replacing or deleting it does not call its ``__set__`` or ``__delete__`` methods. + +Adding a metaclass is a potentially breaking change. This is because derived classes +that use an unrelated metaclass, whether directly or by inheriting from a class such as +abc.ABC that uses one, will raise TypeError when defined. These would have to be +modified to use a newly introduced metaclass that is a lower bound of both. Subclasses +remain unbroken in the far more typical case that they use no custom metaclass. + +The tests in this module do not establish whether the danger of setting Git.USE_SHELL to +True is high enough, and applications of deriving from Git and using an unrelated custom +metaclass marginal enough, to justify introducing a metaclass to issue the warnings. +""" + +import logging +import sys +from typing import Generator +import unittest.mock + +if sys.version_info >= (3, 11): + from typing import assert_type +else: + from typing_extensions import assert_type + +import pytest +from pytest import WarningsRecorder + +from git.cmd import Git, GitMeta + +from .lib import assert_no_deprecation_warning, suppress_deprecation_warning + +_USE_SHELL_DEPRECATED_FRAGMENT = "Git.USE_SHELL is deprecated" +"""Text contained in all USE_SHELL deprecation warnings, and starting most of them.""" + +_USE_SHELL_DANGEROUS_FRAGMENT = "Setting Git.USE_SHELL to True is unsafe and insecure" +"""Beginning text of USE_SHELL deprecation warnings when USE_SHELL is set True.""" + +_logger = logging.getLogger(__name__) + + +@pytest.fixture +def restore_use_shell_state() -> Generator[None, None, None]: + """Fixture to attempt to restore state associated with the USE_SHELL attribute. + + This is used to decrease the likelihood of state changes leaking out and affecting + other tests. But the goal is not to assert implementation details of USE_SHELL. + + This covers two of the common implementation strategies, for convenience in testing + both. USE_SHELL could be implemented in the metaclass: + + * With a separate _USE_SHELL backing attribute. If using a property or other + descriptor, this is the natural way to do it, but custom __getattribute__ and + __setattr__ logic, if it does more than adding warnings, may also use that. + * Like a simple attribute, using USE_SHELL itself, stored as usual in the class + dictionary, with custom __getattribute__/__setattr__ logic only to warn. + + This tries to save private state, tries to save the public attribute value, yields + to the test case, tries to restore the public attribute value, then tries to restore + private state. The idea is that if the getting or setting logic is wrong in the code + under test, the state will still most likely be reset successfully. + """ + no_value = object() + + # Try to save the original private state. + try: + old_private_value = Git._USE_SHELL # type: ignore[attr-defined] + except AttributeError: + separate_backing_attribute = False + try: + old_private_value = type.__getattribute__(Git, "USE_SHELL") + except AttributeError: + old_private_value = no_value + _logger.error("Cannot retrieve old private _USE_SHELL or USE_SHELL value") + else: + separate_backing_attribute = True + + try: + # Try to save the original public value. Rather than attempt to restore a state + # where the attribute is not set, if we cannot do this we allow AttributeError + # to propagate out of the fixture, erroring the test case before its code runs. + with suppress_deprecation_warning(): + old_public_value = Git.USE_SHELL + + # This doesn't have its own try-finally because pytest catches exceptions raised + # during the yield. (The outer try-finally catches exceptions in this fixture.) + yield + + # Try to restore the original public value. + with suppress_deprecation_warning(): + Git.USE_SHELL = old_public_value + finally: + # Try to restore the original private state. + if separate_backing_attribute: + Git._USE_SHELL = old_private_value # type: ignore[attr-defined] + elif old_private_value is not no_value: + type.__setattr__(Git, "USE_SHELL", old_private_value) + + +def test_cannot_access_undefined_on_git_class() -> None: + """Accessing a bogus attribute on the Git class remains a dynamic and static error. + + This differs from Git instances, where most attribute names will dynamically + synthesize a "bound method" that runs a git subcommand when called. + """ + with pytest.raises(AttributeError): + Git.foo # type: ignore[attr-defined] + + +def test_get_use_shell_on_class_default() -> None: + """USE_SHELL can be read as a class attribute, defaulting to False and warning.""" + with pytest.deprecated_call() as ctx: + use_shell = Git.USE_SHELL + + (message,) = [str(entry.message) for entry in ctx] # Exactly one warning. + assert message.startswith(_USE_SHELL_DEPRECATED_FRAGMENT) + + assert_type(use_shell, bool) + + # This comes after the static assertion, just in case it would affect the inference. + assert not use_shell + + +def test_get_use_shell_on_instance_default() -> None: + """USE_SHELL can be read as an instance attribute, defaulting to False and warning. + + This is the same as test_get_use_shell_on_class_default above, but for instances. + The test is repeated, instead of using parametrization, for clearer static analysis. + """ + instance = Git() + + with pytest.deprecated_call() as ctx: + use_shell = instance.USE_SHELL + + (message,) = [str(entry.message) for entry in ctx] # Exactly one warning. + assert message.startswith(_USE_SHELL_DEPRECATED_FRAGMENT) + + assert_type(use_shell, bool) + + # This comes after the static assertion, just in case it would affect the inference. + assert not use_shell + + +def _assert_use_shell_full_results( + set_value: bool, + reset_value: bool, + setting: WarningsRecorder, + checking: WarningsRecorder, + resetting: WarningsRecorder, + rechecking: WarningsRecorder, +) -> None: + # The attribute should take on the values set to it. + assert set_value is True + assert reset_value is False + + # Each access should warn exactly once. + (set_message,) = [str(entry.message) for entry in setting] + (check_message,) = [str(entry.message) for entry in checking] + (reset_message,) = [str(entry.message) for entry in resetting] + (recheck_message,) = [str(entry.message) for entry in rechecking] + + # Setting it to True should produce the special warning for that. + assert _USE_SHELL_DEPRECATED_FRAGMENT in set_message + assert set_message.startswith(_USE_SHELL_DANGEROUS_FRAGMENT) + + # All other operations should produce a usual warning. + assert check_message.startswith(_USE_SHELL_DEPRECATED_FRAGMENT) + assert reset_message.startswith(_USE_SHELL_DEPRECATED_FRAGMENT) + assert recheck_message.startswith(_USE_SHELL_DEPRECATED_FRAGMENT) + + +def test_use_shell_set_and_get_on_class(restore_use_shell_state: None) -> None: + """USE_SHELL can be set and re-read as a class attribute, always warning.""" + with pytest.deprecated_call() as setting: + Git.USE_SHELL = True + with pytest.deprecated_call() as checking: + set_value = Git.USE_SHELL + with pytest.deprecated_call() as resetting: + Git.USE_SHELL = False + with pytest.deprecated_call() as rechecking: + reset_value = Git.USE_SHELL + + _assert_use_shell_full_results( + set_value, + reset_value, + setting, + checking, + resetting, + rechecking, + ) + + +def test_use_shell_set_on_class_get_on_instance(restore_use_shell_state: None) -> None: + """USE_SHELL can be set on the class and read on an instance, always warning. + + This is like test_use_shell_set_and_get_on_class but it performs reads on an + instance. There is some redundancy here in assertions about warnings when the + attribute is set, but it is a separate test so that any bugs where a read on the + class (or an instance) is needed first before a read on an instance (or the class) + are detected. + """ + instance = Git() + + with pytest.deprecated_call() as setting: + Git.USE_SHELL = True + with pytest.deprecated_call() as checking: + set_value = instance.USE_SHELL + with pytest.deprecated_call() as resetting: + Git.USE_SHELL = False + with pytest.deprecated_call() as rechecking: + reset_value = instance.USE_SHELL + + _assert_use_shell_full_results( + set_value, + reset_value, + setting, + checking, + resetting, + rechecking, + ) + + +@pytest.mark.parametrize("value", [False, True]) +def test_use_shell_cannot_set_on_instance( + value: bool, + restore_use_shell_state: None, # In case of a bug where it does set USE_SHELL. +) -> None: + instance = Git() + with pytest.raises(AttributeError): + instance.USE_SHELL = value # type: ignore[misc] # Name not in __slots__. + + +@pytest.mark.filterwarnings("ignore::DeprecationWarning") +@pytest.mark.parametrize("original_value", [False, True]) +def test_use_shell_is_mock_patchable_on_class_as_object_attribute( + original_value: bool, + restore_use_shell_state: None, +) -> None: + """Asymmetric patching looking up USE_SHELL in ``__dict__`` doesn't corrupt state. + + Code using GitPython may temporarily set Git.USE_SHELL to a different value. Ideally + it does not use unittest.mock.patch to do so, because that makes subtle assumptions + about the relationship between attributes and dictionaries. If the attribute can be + retrieved from the ``__dict__`` rather than directly, that value is assumed the + correct one to restore, even by a normal setattr. + + The effect is that some ways of simulating a class attribute with added behavior can + cause a descriptor, such as a property, to be set as the value of its own backing + attribute during unpatching; then subsequent reads raise RecursionError. This + happens if both (a) setting it on the class is customized in a metaclass and (b) + getting it on instances is customized with a descriptor (such as a property) in the + class itself. + + Although ideally code outside GitPython would not rely on being able to patch + Git.USE_SHELL with unittest.mock.patch, the technique is widespread. Thus, USE_SHELL + should be implemented in some way compatible with it. This test checks for that. + """ + Git.USE_SHELL = original_value + if Git.USE_SHELL is not original_value: + raise RuntimeError("Can't set up the test") + new_value = not original_value + + with unittest.mock.patch.object(Git, "USE_SHELL", new_value): + assert Git.USE_SHELL is new_value + + assert Git.USE_SHELL is original_value + + +def test_execute_without_shell_arg_does_not_warn() -> None: + """No deprecation warning is issued from operations implemented using Git.execute(). + + When no ``shell`` argument is passed to Git.execute, which is when the value of + USE_SHELL is to be used, the way Git.execute itself accesses USE_SHELL does not + issue a deprecation warning. + """ + with assert_no_deprecation_warning(): + Git().version() + + +_EXPECTED_DIR_SUBSET = { + "cat_file_all", + "cat_file_header", + "GIT_PYTHON_TRACE", + "USE_SHELL", # The attribute we get deprecation warnings for. + "GIT_PYTHON_GIT_EXECUTABLE", + "refresh", + "is_cygwin", + "polish_url", + "check_unsafe_protocols", + "check_unsafe_options", + "AutoInterrupt", + "CatFileContentStream", + "__init__", + "__getattr__", + "set_persistent_git_options", + "working_dir", + "version_info", + "execute", + "environment", + "update_environment", + "custom_environment", + "transform_kwarg", + "transform_kwargs", + "__call__", + "_call_process", # Not currently considered public, but unlikely to change. + "get_object_header", + "get_object_data", + "stream_object_data", + "clear_cache", +} +"""Some stable attributes dir() should include on the Git class and its instances. + +This is intentionally incomplete, but includes substantial variety. Most importantly, it +includes both ``USE_SHELL`` and a wide sampling of other attributes. +""" + + +def test_class_dir() -> None: + """dir() on the Git class includes its statically known attributes. + + This tests that the mechanism that adds dynamic behavior to USE_SHELL accesses so + that all accesses issue warnings does not break dir() for the class, neither for + USE_SHELL nor for ordinary (non-deprecated) attributes. + """ + actual = set(dir(Git)) + assert _EXPECTED_DIR_SUBSET <= actual + + +def test_instance_dir() -> None: + """dir() on Git objects includes its statically known attributes. + + This is like test_class_dir, but for Git instances rather than the class itself. + """ + instance = Git() + actual = set(dir(instance)) + assert _EXPECTED_DIR_SUBSET <= actual + + +def test_metaclass_alias() -> None: + """GitMeta aliases Git's metaclass, whether that is type or a custom metaclass.""" + + def accept_metaclass_instance(cls: GitMeta) -> None: + """Check that cls is statically recognizable as an instance of GitMeta.""" + + accept_metaclass_instance(Git) # assert_type would expect Type[Git], not GitMeta. + + # This comes after the static check, just in case it would affect the inference. + assert type(Git) is GitMeta diff --git a/test/deprecation/test_compat.py b/test/deprecation/test_compat.py new file mode 100644 index 0000000..2d7805e --- /dev/null +++ b/test/deprecation/test_compat.py @@ -0,0 +1,84 @@ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Tests for dynamic and static characteristics of git.compat module attributes. + +These tests verify that the is_<platform> attributes are available, and are even listed +in the output of dir(), but issue warnings, and that bogus (misspelled or unrecognized) +attribute access is still an error both at runtime and with mypy. This is similar to +some of the tests in test_toplevel, but the situation being tested here is simpler +because it does not involve unintuitive module aliasing or import behavior. So this only +tests attribute access, not "from" imports (whose behavior can be intuitively inferred). +""" + +import os +import sys + +if sys.version_info >= (3, 11): + from typing import assert_type +else: + from typing_extensions import assert_type + +import pytest + +import git.compat + +_MESSAGE_LEADER = "{} and other is_<platform> aliases are deprecated." +"""Form taken by the beginning of the warnings issued for is_<platform> access.""" + + +def test_cannot_access_undefined() -> None: + """Accessing a bogus attribute in git.compat remains a dynamic and static error.""" + with pytest.raises(AttributeError): + git.compat.foo # type: ignore[attr-defined] + + +def test_is_platform() -> None: + """The is_<platform> attributes work, warn, and mypy accepts code accessing them.""" + fully_qualified_names = [ + "git.compat.is_win", + "git.compat.is_posix", + "git.compat.is_darwin", + ] + + with pytest.deprecated_call() as ctx: + is_win = git.compat.is_win + is_posix = git.compat.is_posix + is_darwin = git.compat.is_darwin + + assert_type(is_win, bool) + assert_type(is_posix, bool) + assert_type(is_darwin, bool) + + messages = [str(entry.message) for entry in ctx] + assert len(messages) == 3 + + for fullname, message in zip(fully_qualified_names, messages): + assert message.startswith(_MESSAGE_LEADER.format(fullname)) + + # These assertions exactly reproduce the expressions in the code under test, so they + # are not good for testing that the values are correct. Instead, their purpose is to + # ensure that any dynamic machinery put in place in git.compat to cause warnings to + # be issued does not get in the way of the intended values being accessed. + assert is_win == (os.name == "nt") + assert is_posix == (os.name == "posix") + assert is_darwin == (sys.platform == "darwin") + + +def test_dir() -> None: + """dir() on git.compat includes all public attributes, even if deprecated. + + As dir() usually does, it also has nonpublic attributes, which should also not be + removed by a custom __dir__ function, but those are less important to test. + """ + expected_subset = { + "is_win", + "is_posix", + "is_darwin", + "defenc", + "safe_decode", + "safe_encode", + "win_encode", + } + actual = set(dir(git.compat)) + assert expected_subset <= actual diff --git a/test/deprecation/test_toplevel.py b/test/deprecation/test_toplevel.py new file mode 100644 index 0000000..7404081 --- /dev/null +++ b/test/deprecation/test_toplevel.py @@ -0,0 +1,233 @@ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Tests for dynamic and static characteristics of top-level git module attributes. + +Provided mypy has ``warn_unused_ignores = true`` set, running mypy on these test cases +checks static typing of the code under test. This is the reason for the many separate +single-line attr-defined suppressions, so those should not be replaced with a smaller +number of more broadly scoped suppressions, even where it is feasible to do so. + +Running pytest checks dynamic behavior as usual. +""" + +import itertools +import sys +from typing import Type + +if sys.version_info >= (3, 11): + from typing import assert_type +else: + from typing_extensions import assert_type + +import pytest + +import git +import git.index.base +import git.index.fun +import git.index.typ +import git.refs.head +import git.refs.log +import git.refs.reference +import git.refs.symbolic +import git.refs.tag + + +def test_cannot_access_undefined() -> None: + """Accessing a bogus attribute in git remains a dynamic and static error.""" + with pytest.raises(AttributeError): + git.foo # type: ignore[attr-defined] + + +def test_cannot_import_undefined() -> None: + """Importing a bogus attribute from git remains a dynamic and static error.""" + with pytest.raises(ImportError): + from git import foo # type: ignore[attr-defined] # noqa: F401 + + +def test_util_alias_access() -> None: + """Accessing util in git works, warns, and mypy verifies it and its attributes.""" + # The attribute access should succeed. + with pytest.deprecated_call() as ctx: + util = git.util + + # There should be exactly one warning and it should have our util-specific message. + (message,) = [str(entry.message) for entry in ctx] + assert "git.util" in message + assert "git.index.util" in message + assert "should not be relied on" in message + + # We check access through the util alias to the TemporaryFileSwap member, since it + # is slightly simpler to validate and reason about than the other public members, + # which are functions (specifically, higher-order functions for use as decorators). + from git.index.util import TemporaryFileSwap + + assert_type(util.TemporaryFileSwap, Type[TemporaryFileSwap]) + + # This comes after the static assertion, just in case it would affect the inference. + assert util.TemporaryFileSwap is TemporaryFileSwap + + +def test_util_alias_import() -> None: + """Importing util from git works, warns, and mypy verifies it and its attributes.""" + # The import should succeed. + with pytest.deprecated_call() as ctx: + from git import util + + # There may be multiple warnings. In CPython there will be currently always be + # exactly two, possibly due to the equivalent of calling hasattr to do a pre-check + # prior to retrieving the attribute for actual use. However, all warnings should + # have the same message, and it should be our util-specific message. + (message,) = {str(entry.message) for entry in ctx} + assert "git.util" in message, "Has alias." + assert "git.index.util" in message, "Has target." + assert "should not be relied on" in message, "Distinct from other messages." + + # As above, we check access through the util alias to the TemporaryFileSwap member. + from git.index.util import TemporaryFileSwap + + assert_type(util.TemporaryFileSwap, Type[TemporaryFileSwap]) + + # This comes after the static assertion, just in case it would affect the inference. + assert util.TemporaryFileSwap is TemporaryFileSwap + + +_PRIVATE_MODULE_ALIAS_TARGETS = ( + git.refs.head, + git.refs.log, + git.refs.reference, + git.refs.symbolic, + git.refs.tag, + git.index.base, + git.index.fun, + git.index.typ, +) +"""Targets of private aliases in the git module to some modules, not including util.""" + + +_PRIVATE_MODULE_ALIAS_TARGET_NAMES = ( + "git.refs.head", + "git.refs.log", + "git.refs.reference", + "git.refs.symbolic", + "git.refs.tag", + "git.index.base", + "git.index.fun", + "git.index.typ", +) +"""Expected ``__name__`` attributes of targets of private aliases in the git module.""" + + +def test_alias_target_module_names_are_by_location() -> None: + """The aliases are weird, but their targets are normal, even in ``__name__``.""" + actual = [module.__name__ for module in _PRIVATE_MODULE_ALIAS_TARGETS] + expected = list(_PRIVATE_MODULE_ALIAS_TARGET_NAMES) + assert actual == expected + + +def test_private_module_alias_access() -> None: + """Non-util private alias access works but warns and is a deliberate mypy error.""" + with pytest.deprecated_call() as ctx: + assert ( + git.head, # type: ignore[attr-defined] + git.log, # type: ignore[attr-defined] + git.reference, # type: ignore[attr-defined] + git.symbolic, # type: ignore[attr-defined] + git.tag, # type: ignore[attr-defined] + git.base, # type: ignore[attr-defined] + git.fun, # type: ignore[attr-defined] + git.typ, # type: ignore[attr-defined] + ) == _PRIVATE_MODULE_ALIAS_TARGETS + + # Each should have warned exactly once, and note what to use instead. + messages = [str(w.message) for w in ctx] + + assert len(messages) == len(_PRIVATE_MODULE_ALIAS_TARGETS) + + for fullname, message in zip(_PRIVATE_MODULE_ALIAS_TARGET_NAMES, messages): + assert message.endswith(f"Use {fullname} instead.") + + +def test_private_module_alias_import() -> None: + """Non-util private alias import works but warns and is a deliberate mypy error.""" + with pytest.deprecated_call() as ctx: + from git import head # type: ignore[attr-defined] + from git import log # type: ignore[attr-defined] + from git import reference # type: ignore[attr-defined] + from git import symbolic # type: ignore[attr-defined] + from git import tag # type: ignore[attr-defined] + from git import base # type: ignore[attr-defined] + from git import fun # type: ignore[attr-defined] + from git import typ # type: ignore[attr-defined] + + assert ( + head, + log, + reference, + symbolic, + tag, + base, + fun, + typ, + ) == _PRIVATE_MODULE_ALIAS_TARGETS + + # Each import may warn multiple times. In CPython there will be currently always be + # exactly two warnings per import, possibly due to the equivalent of calling hasattr + # to do a pre-check prior to retrieving the attribute for actual use. However, for + # each import, all messages should be the same and should note what to use instead. + messages_with_duplicates = [str(w.message) for w in ctx] + messages = [message for message, _ in itertools.groupby(messages_with_duplicates)] + + assert len(messages) == len(_PRIVATE_MODULE_ALIAS_TARGETS) + + for fullname, message in zip(_PRIVATE_MODULE_ALIAS_TARGET_NAMES, messages): + assert message.endswith(f"Use {fullname} instead.") + + +def test_dir_contains_public_attributes() -> None: + """All public attributes of the git module are present when dir() is called on it. + + This is naturally the case, but some ways of adding dynamic attribute access + behavior can change it, especially if __dir__ is defined but care is not taken to + preserve the contents that should already be present. + + Note that dir() should usually automatically list non-public attributes if they are + actually "physically" present as well, so the approach taken here to test it should + not be reproduced if __dir__ is added (instead, a call to globals() could be used, + as its keys list the automatic values). + """ + expected_subset = set(git.__all__) + actual = set(dir(git)) + assert expected_subset <= actual + + +def test_dir_does_not_contain_util() -> None: + """The util attribute is absent from the dir() of git. + + Because this behavior is less confusing than including it, where its meaning would + be assumed by users examining the dir() for what is available. + """ + assert "util" not in dir(git) + + +def test_dir_does_not_contain_private_module_aliases() -> None: + """Names from inside index and refs only pretend to be there and are not in dir(). + + The reason for omitting these is not that they are private, since private members + are usually included in dir() when actually present. Instead, these are only sort + of even there, no longer being imported and only being resolved dynamically for the + time being. In addition, it would be confusing to list these because doing so would + obscure the module structure of GitPython. + """ + expected_absent = { + "head", + "log", + "reference", + "symbolic", + "tag", + "base", + "fun", + "typ", + } + actual = set(dir(git)) + assert not (expected_absent & actual), "They should be completely disjoint." diff --git a/test/deprecation/test_types.py b/test/deprecation/test_types.py new file mode 100644 index 0000000..f97375a --- /dev/null +++ b/test/deprecation/test_types.py @@ -0,0 +1,69 @@ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Tests for dynamic and static characteristics of git.types module attributes.""" + +import sys + +if sys.version_info >= (3, 8): + from typing import Literal +else: + from typing_extensions import Literal + +import pytest + +import git.types + + +def test_cannot_access_undefined() -> None: + """Accessing a bogus attribute in git.types remains a dynamic and static error.""" + with pytest.raises(AttributeError): + git.types.foo # type: ignore[attr-defined] + + +def test_can_access_lit_commit_ish_but_it_is_not_usable() -> None: + """Lit_commit_ish_can be accessed, but warns and is an invalid type annotation.""" + # It would be fine to test attribute access rather than a "from" import. But a + # "from" import is more likely to appear in actual usage, so it is used here. + with pytest.deprecated_call() as ctx: + from git.types import Lit_commit_ish + + # As noted in test_toplevel.test_util_alias_import, there may be multiple warnings, + # but all with the same message. + (message,) = {str(entry.message) for entry in ctx} + assert "Lit_commit_ish is deprecated." in message + assert 'Literal["commit", "tag", "blob", "tree"]' in message, "Has old definition." + assert 'Literal["commit", "tag"]' in message, "Has new definition." + assert "GitObjectTypeString" in message, "Has new type name for old definition." + + _: Lit_commit_ish = "commit" # type: ignore[valid-type] + + # It should be as documented (even though deliberately unusable in static checks). + assert Lit_commit_ish == Literal["commit", "tag"] + + +def test_dir() -> None: + """dir() on git.types includes public names, even ``Lit_commit_ish``. + + It also contains private names that we don't test. See test_compat.test_dir. + """ + expected_subset = { + "PathLike", + "TBD", + "AnyGitObject", + "Tree_ish", + "Commit_ish", + "GitObjectTypeString", + "Lit_commit_ish", + "Lit_config_levels", + "ConfigLevels_Tup", + "CallableProgress", + "assert_never", + "Files_TD", + "Total_TD", + "HSH_TD", + "Has_Repo", + "Has_id_attribute", + } + actual = set(dir(git.types)) + assert expected_subset <= actual diff --git a/test/fixtures/diff_numstat b/test/fixtures/diff_numstat index 44c6ca2..b76e467 100644 --- a/test/fixtures/diff_numstat +++ b/test/fixtures/diff_numstat @@ -1,2 +1,3 @@ -29 18 a.txt -0 5 b.txt +M 29 18 a.txt +M 0 5 b.txt +A 7 0 c.txt \ No newline at end of file diff --git a/test/fixtures/env_case.py b/test/fixtures/env_case.py index fe85ac4..03b4df2 100644 --- a/test/fixtures/env_case.py +++ b/test/fixtures/env_case.py @@ -1,4 +1,4 @@ -# Steps 3 and 4 for test_it_avoids_upcasing_unrelated_environment_variable_names. +"""Steps 3 and 4 for test_it_avoids_upcasing_unrelated_environment_variable_names.""" import subprocess import sys diff --git a/test/fixtures/polyglot b/test/fixtures/polyglot new file mode 100755 index 0000000..f1dd56b --- /dev/null +++ b/test/fixtures/polyglot @@ -0,0 +1,8 @@ +#!/usr/bin/env sh +# Valid script in both Bash and Python, but with different behavior. +""":" +echo 'Ran intended hook.' >output.txt +exit +" """ +from pathlib import Path +Path('payload.txt').write_text('Ran impostor hook!', encoding='utf-8') diff --git a/test/lib/__init__.py b/test/lib/__init__.py index 299317c..f96072c 100644 --- a/test/lib/__init__.py +++ b/test/lib/__init__.py @@ -1,11 +1,10 @@ -# __init__.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ -# flake8: noqa import inspect -from .helper import * + +from .helper import * # noqa: F401 F403 __all__ = [name for name, obj in locals().items() if not (name.startswith("_") or inspect.ismodule(obj))] diff --git a/test/lib/helper.py b/test/lib/helper.py index e8464b7..5d91447 100644 --- a/test/lib/helper.py +++ b/test/lib/helper.py @@ -1,25 +1,42 @@ -# helper.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +__all__ = [ + "fixture_path", + "fixture", + "StringProcessAdapter", + "with_rw_directory", + "with_rw_repo", + "with_rw_and_rw_remote_repo", + "TestBase", + "VirtualEnvironment", + "TestCase", + "SkipTest", + "skipIf", + "GIT_REPO", + "GIT_DAEMON_PORT", +] + import contextlib from functools import wraps import gc import io import logging import os +import os.path as osp +import subprocess +import sys import tempfile import textwrap import time import unittest +import venv -from git.compat import is_win -from git.util import rmtree, cwd import gitdb -import os.path as osp - +from git.util import rmtree, cwd TestCase = unittest.TestCase SkipTest = unittest.SkipTest @@ -30,22 +47,7 @@ ospd = osp.dirname GIT_REPO = os.environ.get("GIT_PYTHON_TEST_GIT_REPO_BASE", ospd(ospd(ospd(__file__)))) GIT_DAEMON_PORT = os.environ.get("GIT_PYTHON_TEST_GIT_DAEMON_PORT", "19418") -__all__ = ( - "fixture_path", - "fixture", - "StringProcessAdapter", - "with_rw_directory", - "with_rw_repo", - "with_rw_and_rw_remote_repo", - "TestBase", - "TestCase", - "SkipTest", - "skipIf", - "GIT_REPO", - "GIT_DAEMON_PORT", -) - -log = logging.getLogger(__name__) +_logger = logging.getLogger(__name__) # { Routines @@ -64,10 +66,11 @@ def fixture(name): # { Adapters -class StringProcessAdapter(object): +class StringProcessAdapter: + """Allows strings to be used as process objects returned by subprocess.Popen. - """Allows to use strings as Process object as returned by SubProcess.Popen. - Its tailored to work with the test system only""" + This is tailored to work with the test system only. + """ def __init__(self, input_string): self.stdout = io.BytesIO(input_string) @@ -86,18 +89,18 @@ class StringProcessAdapter(object): def with_rw_directory(func): """Create a temporary directory which can be written to, remove it if the - test succeeds, but leave it otherwise to aid additional debugging""" + test succeeds, but leave it otherwise to aid additional debugging.""" @wraps(func) - def wrapper(self): - path = tempfile.mktemp(prefix=func.__name__) - os.mkdir(path) + def wrapper(self, *args, **kwargs): + path = tempfile.mkdtemp(prefix=func.__name__) keep = False try: - return func(self, path) + return func(self, path, *args, **kwargs) except Exception: - log.info( - "Test %s.%s failed, output is at %r\n", + _logger.info( + "%s %s.%s failed, output is at %r\n", + "Test" if func.__name__.startswith("test_") else "Helper", type(self).__name__, func.__name__, path, @@ -106,7 +109,7 @@ def with_rw_directory(func): raise finally: # Need to collect here to be sure all handles have been closed. It appears - # a windows-only issue. In fact things should be deleted, as well as + # a Windows-only issue. In fact things should be deleted, as well as # memory maps closed, once objects go out of scope. For some reason # though this is not the case here unless we collect explicitly. gc.collect() @@ -117,8 +120,7 @@ def with_rw_directory(func): def with_rw_repo(working_tree_ref, bare=False): - """ - Same as with_bare_repo, but clones the rorepo as non-bare repository, checking + """Same as with_bare_repo, but clones the rorepo as non-bare repository, checking out the working tree at the given working_tree_ref. This repository type is more costly due to the working copy checkout. @@ -147,8 +149,8 @@ def with_rw_repo(working_tree_ref, bare=False): os.chdir(rw_repo.working_dir) try: return func(self, rw_repo) - except: # noqa E722 - log.info("Keeping repo after failure: %s", repo_dir) + except: # noqa: E722 B001 + _logger.info("Keeping repo after failure: %s", repo_dir) repo_dir = None raise finally: @@ -176,15 +178,13 @@ def git_daemon_launched(base_path, ip, port): gd = None try: - if is_win: - ## On MINGW-git, daemon exists in .\Git\mingw64\libexec\git-core\, - # but if invoked as 'git daemon', it detaches from parent `git` cmd, - # and then CANNOT DIE! - # So, invoke it as a single command. - ## Cygwin-git has no daemon. But it can use MINGW's. - # + if sys.platform == "win32": + # On MINGW-git, daemon exists in Git\mingw64\libexec\git-core\, + # but if invoked as 'git daemon', it detaches from parent `git` cmd, + # and then CANNOT DIE! + # So, invoke it as a single command. daemon_cmd = [ - "git-daemon", + osp.join(Git()._call_process("--exec-path"), "git-daemon"), "--enable=receive-pack", "--listen=%s" % ip, "--port=%s" % port, @@ -201,8 +201,8 @@ def git_daemon_launched(base_path, ip, port): base_path=base_path, as_process=True, ) - # yes, I know ... fortunately, this is always going to work if sleep time is just large enough - time.sleep(0.5 * (1 + is_win)) + # Yes, I know... fortunately, this is always going to work if sleep time is just large enough. + time.sleep(1.0 if sys.platform == "win32" else 0.5) except Exception as ex: msg = textwrap.dedent( """ @@ -215,16 +215,7 @@ def git_daemon_launched(base_path, ip, port): and setting the environment variable GIT_PYTHON_TEST_GIT_DAEMON_PORT to <port> """ ) - if is_win: - msg += textwrap.dedent( - r""" - - On Windows, - the `git-daemon.exe` must be in PATH. - For MINGW, look into .\Git\mingw64\libexec\git-core\), but problems with paths might appear. - CYGWIN has no daemon, but if one exists, it gets along fine (but has also paths problems).""" - ) - log.warning(msg, ex, ip, port, base_path, base_path, exc_info=1) + _logger.warning(msg, ex, ip, port, base_path, base_path, exc_info=1) yield # OK, assume daemon started manually. @@ -233,36 +224,39 @@ def git_daemon_launched(base_path, ip, port): finally: if gd: try: - log.debug("Killing git-daemon...") + _logger.debug("Killing git-daemon...") gd.proc.kill() except Exception as ex: - ## Either it has died (and we're here), or it won't die, again here... - log.debug("Hidden error while Killing git-daemon: %s", ex, exc_info=1) + # Either it has died (and we're here), or it won't die, again here... + _logger.debug("Hidden error while Killing git-daemon: %s", ex, exc_info=1) def with_rw_and_rw_remote_repo(working_tree_ref): - """ - Same as with_rw_repo, but also provides a writable remote repository from which the - rw_repo has been forked as well as a handle for a git-daemon that may be started to - run the remote_repo. - The remote repository was cloned as bare repository from the ro repo, whereas - the rw repo has a working tree and was cloned from the remote repository. + """Same as with_rw_repo, but also provides a writable remote repository from which + the rw_repo has been forked as well as a handle for a git-daemon that may be started + to run the remote_repo. - remote_repo has two remotes: origin and daemon_origin. One uses a local url, - the other uses a server url. The daemon setup must be done on system level - and should be an inetd service that serves tempdir.gettempdir() and all - directories in it. + The remote repository was cloned as bare repository from the ro repo, whereas the rw + repo has a working tree and was cloned from the remote repository. + + remote_repo has two remotes: origin and daemon_origin. One uses a local url, the + other uses a server url. The daemon setup must be done on system level and should be + an inetd service that serves tempdir.gettempdir() and all directories in it. The following sketch demonstrates this:: - rorepo ---<bare clone>---> rw_remote_repo ---<clone>---> rw_repo + + rorepo ---<bare clone>---> rw_remote_repo ---<clone>---> rw_repo The test case needs to support the following signature:: + def case(self, rw_repo, rw_daemon_repo) This setup allows you to test push and pull scenarios and hooks nicely. - See working dir info in with_rw_repo - :note: We attempt to launch our own invocation of git-daemon, which will be shutdown at the end of the test. + See working dir info in :func:`with_rw_repo`. + + :note: We attempt to launch our own invocation of git-daemon, which will be shut + down at the end of the test. """ from git import Git, Remote # To avoid circular deps. @@ -275,16 +269,16 @@ def with_rw_and_rw_remote_repo(working_tree_ref): rw_repo_dir = tempfile.mktemp(prefix="daemon_cloned_repo-%s-" % func.__name__) rw_daemon_repo = self.rorepo.clone(rw_daemon_repo_dir, shared=True, bare=True) - # recursive alternates info ? + # Recursive alternates info? rw_repo = rw_daemon_repo.clone(rw_repo_dir, shared=True, bare=False, n=True) try: rw_repo.head.commit = working_tree_ref rw_repo.head.reference.checkout() - # prepare for git-daemon + # Prepare for git-daemon. rw_daemon_repo.daemon_export = True - # this thing is just annoying ! + # This thing is just annoying! with rw_daemon_repo.config_writer() as crw: section = "daemon" try: @@ -305,7 +299,7 @@ def with_rw_and_rw_remote_repo(working_tree_ref): cw.set("url", remote_repo_url) with git_daemon_launched( - Git.polish_url(base_daemon_path, is_cygwin=False), # No daemon in Cygwin. + Git.polish_url(base_daemon_path), "127.0.0.1", GIT_DAEMON_PORT, ): @@ -315,8 +309,8 @@ def with_rw_and_rw_remote_repo(working_tree_ref): with cwd(rw_repo.working_dir): try: return func(self, rw_repo, rw_daemon_repo) - except: # noqa E722 - log.info( + except: # noqa: E722 B001 + _logger.info( "Keeping repos after failure: \n rw_repo_dir: %s \n rw_daemon_repo_dir: %s", rw_repo_dir, rw_daemon_repo_dir, @@ -351,35 +345,36 @@ def with_rw_and_rw_remote_repo(working_tree_ref): class TestBase(TestCase): - - """ - Base Class providing default functionality to all tests such as: + """Base class providing default functionality to all tests such as: - Utility functions provided by the TestCase base of the unittest method such as:: + self.fail("todo") self.assertRaises(...) - Class level repository which is considered read-only as it is shared among all test cases in your type. + Access it using:: - self.rorepo # 'ro' stands for read-only + + self.rorepo # 'ro' stands for read-only The rorepo is in fact your current project's git repo. If you refer to specific - shas for your objects, be sure you choose some that are part of the immutable portion - of the project history ( to assure tests don't fail for others ). + shas for your objects, be sure you choose some that are part of the immutable + portion of the project history (so that tests don't fail for others). """ def _small_repo_url(self): - """:return" a path to a small, clonable repository""" + """:return: A path to a small, clonable repository""" from git.cmd import Git return Git.polish_url(osp.join(self.rorepo.working_tree_dir, "git/ext/gitdb/gitdb/ext/smmap")) @classmethod def setUpClass(cls): - """ - Dynamically add a read-only repository to our actual type. This way - each test type has its own repository + """Dynamically add a read-only repository to our actual type. + + This way, each test type has its own repository. """ from git import Repo @@ -393,11 +388,63 @@ class TestBase(TestCase): def _make_file(self, rela_path, data, repo=None): """ - Create a file at the given path relative to our repository, filled - with the given data. Returns absolute path to created file. + Create a file at the given path relative to our repository, filled with the + given data. + + :return: An absolute path to the created file. """ repo = repo or self.rorepo abs_path = osp.join(repo.working_tree_dir, rela_path) with open(abs_path, "w") as fp: fp.write(data) return abs_path + + +class VirtualEnvironment: + """A newly created Python virtual environment for use in a test.""" + + __slots__ = ("_env_dir",) + + def __init__(self, env_dir, *, with_pip): + if sys.platform == "win32": + self._env_dir = osp.realpath(env_dir) + venv.create(self.env_dir, symlinks=False, with_pip=with_pip) + else: + self._env_dir = env_dir + venv.create(self.env_dir, symlinks=True, with_pip=with_pip) + + if with_pip: + # The upgrade_deps parameter to venv.create is 3.9+ only, so do it this way. + command = [self.python, "-m", "pip", "install", "--upgrade", "pip"] + if sys.version_info < (3, 12): + command.append("setuptools") + subprocess.check_output(command) + + @property + def env_dir(self): + """The top-level directory of the environment.""" + return self._env_dir + + @property + def python(self): + """Path to the Python executable in the environment.""" + return self._executable("python") + + @property + def pip(self): + """Path to the pip executable in the environment, or RuntimeError if absent.""" + return self._executable("pip") + + @property + def sources(self): + """Path to a src directory in the environment, which may not exist yet.""" + return os.path.join(self.env_dir, "src") + + def _executable(self, basename): + if sys.platform == "win32": + path = osp.join(self.env_dir, "Scripts", basename + ".exe") + else: + path = osp.join(self.env_dir, "bin", basename) + if osp.isfile(path) or osp.islink(path): + return path + raise RuntimeError(f"no regular file or symlink {path!r}") diff --git a/test/performance/__init__.py b/test/performance/__init__.py index e69de29..56b5d89 100644 --- a/test/performance/__init__.py +++ b/test/performance/__init__.py @@ -0,0 +1,2 @@ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ diff --git a/test/performance/lib.py b/test/performance/lib.py index c793d77..c245999 100644 --- a/test/performance/lib.py +++ b/test/performance/lib.py @@ -1,13 +1,18 @@ -"""Contains library functions""" +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Support library for performance tests.""" + import logging import os +import os.path as osp import tempfile from git import Repo from git.db import GitCmdObjectDB, GitDB -from test.lib import TestBase from git.util import rmtree -import os.path as osp + +from test.lib import TestBase # { Invariants @@ -20,36 +25,28 @@ k_env_git_repo = "GIT_PYTHON_TEST_GIT_REPO_BASE" class TestBigRepoR(TestBase): - """TestCase providing access to readonly 'big' repositories using the following member variables: - * gitrorepo - - * Read-Only git repository - actually the repo of git itself - - * puregitrorepo + * gitrorepo: + Read-Only git repository - actually (by default) the repo of GitPython itself. - * As gitrepo, but uses pure python implementation + * puregitrorepo: + Like gitrorepo, but uses a pure Python implementation for its object database. """ - # { Invariants - # } END invariants - def setUp(self): - try: - super(TestBigRepoR, self).setUp() - except AttributeError: - pass + super().setUp() repo_path = os.environ.get(k_env_git_repo) if repo_path is None: logging.info( - ("You can set the %s environment variable to a .git repository of" % k_env_git_repo) - + "your choice - defaulting to the gitpython repository" + "You can set the %s environment variable to a .git repository of your" + " choice - defaulting to the GitPython repository", + k_env_git_repo, ) repo_path = osp.dirname(__file__) - # end set some repo path + # END set some repo path self.gitrorepo = Repo(repo_path, odbt=GitCmdObjectDB, search_parent_directories=True) self.puregitrorepo = Repo(repo_path, odbt=GitDB, search_parent_directories=True) @@ -61,24 +58,20 @@ class TestBigRepoR(TestBase): class TestBigRepoRW(TestBigRepoR): + """Like :class:`TestBigRepoR`, but provides a big repository that we can write to. - """As above, but provides a big repository that we can write to. - - Provides ``self.gitrwrepo`` and ``self.puregitrwrepo``""" + Provides ``self.gitrwrepo`` and ``self.puregitrwrepo``. + """ def setUp(self): self.gitrwrepo = None - try: - super(TestBigRepoRW, self).setUp() - except AttributeError: - pass - dirname = tempfile.mktemp() - os.mkdir(dirname) + super().setUp() + dirname = tempfile.mkdtemp() self.gitrwrepo = self.gitrorepo.clone(dirname, shared=True, bare=True, odbt=GitCmdObjectDB) self.puregitrwrepo = Repo(dirname, odbt=GitDB) def tearDown(self): - super(TestBigRepoRW, self).tearDown() + super().tearDown() if self.gitrwrepo is not None: rmtree(self.gitrwrepo.working_dir) self.gitrwrepo.git.clear_cache() diff --git a/test/performance/test_commit.py b/test/performance/test_commit.py index dbe2ad4..b943f19 100644 --- a/test/performance/test_commit.py +++ b/test/performance/test_commit.py @@ -1,25 +1,28 @@ -# test_performance.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Performance tests for commits (iteration, traversal, and serialization).""" + +import gc from io import BytesIO from time import time import sys -from .lib import TestBigRepoRW -from git import Commit from gitdb import IStream + +from git import Commit + +from test.performance.lib import TestBigRepoRW from test.test_commit import TestCommitSerialization class TestPerformance(TestBigRepoRW, TestCommitSerialization): def tearDown(self): - import gc - gc.collect() - # ref with about 100 commits in its history + # ref with about 100 commits in its history. ref_100 = "0.1.6" def _query_commit_info(self, c): @@ -36,9 +39,9 @@ class TestPerformance(TestBigRepoRW, TestCommitSerialization): no = 0 nc = 0 - # find the first commit containing the given path - always do a full - # iteration ( restricted to the path in question ), but in fact it should - # return quite a lot of commits, we just take one and hence abort the operation + # Find the first commit containing the given path. Always do a full iteration + # (restricted to the path in question). This should return quite a lot of + # commits. We just take one and hence abort the operation. st = time() for c in self.rorepo.iter_commits(self.ref_100): @@ -57,7 +60,7 @@ class TestPerformance(TestBigRepoRW, TestCommitSerialization): ) def test_commit_traversal(self): - # bound to cat-file parsing performance + # Bound to cat-file parsing performance. nc = 0 st = time() for c in self.gitrorepo.commit().traverse(branch_first=False): @@ -71,7 +74,7 @@ class TestPerformance(TestBigRepoRW, TestCommitSerialization): ) def test_commit_iteration(self): - # bound to stream parsing performance + # Bound to stream parsing performance. nc = 0 st = time() for c in Commit.iter_items(self.gitrorepo, self.gitrorepo.head): @@ -89,8 +92,8 @@ class TestPerformance(TestBigRepoRW, TestCommitSerialization): rwrepo = self.gitrwrepo make_object = rwrepo.odb.store - # direct serialization - deserialization can be tested afterwards - # serialization is probably limited on IO + # Direct serialization - deserialization can be tested afterwards. + # Serialization is probably limited on IO. hc = rwrepo.commit(rwrepo.head) nc = 5000 diff --git a/test/performance/test_odb.py b/test/performance/test_odb.py index 4208c41..fdbbeb8 100644 --- a/test/performance/test_odb.py +++ b/test/performance/test_odb.py @@ -1,8 +1,12 @@ -"""Performance tests for object store""" +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Performance tests for object store.""" + import sys from time import time -from .lib import TestBigRepoR +from test.performance.lib import TestBigRepoR class TestObjDBPerformance(TestBigRepoR): @@ -24,7 +28,7 @@ class TestObjDBPerformance(TestBigRepoR): results[0].append(elapsed) # GET TREES - # walk all trees of all commits + # Walk all trees of all commits. st = time() blobs_per_commit = [] nt = 0 @@ -35,7 +39,7 @@ class TestObjDBPerformance(TestBigRepoR): nt += 1 if item.type == "blob": blobs.append(item) - # direct access for speed + # Direct access for speed. # END while trees are there for walking blobs_per_commit.append(blobs) # END for each commit @@ -75,7 +79,7 @@ class TestObjDBPerformance(TestBigRepoR): results[2].append(elapsed) # END for each repo type - # final results + # Final results. for test_name, a, b in results: print( "%s: %f s vs %f s, pure is %f times slower" % (test_name, a, b, b / a), diff --git a/test/performance/test_streams.py b/test/performance/test_streams.py index 25e0815..f6ffeba 100644 --- a/test/performance/test_streams.py +++ b/test/performance/test_streams.py @@ -1,27 +1,32 @@ -"""Performance data streaming performance""" +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Performance tests for data streaming.""" + +import gc import os +import os.path as osp import subprocess import sys from time import time -from test.lib import with_rw_repo -from git.util import bin_to_hex from gitdb import LooseObjectDB, IStream from gitdb.test.lib import make_memory_file -import os.path as osp +from git.util import bin_to_hex -from .lib import TestBigRepoR +from test.lib import with_rw_repo +from test.performance.lib import TestBigRepoR class TestObjDBPerformance(TestBigRepoR): - large_data_size_bytes = 1000 * 1000 * 10 # some MiB should do it - moderate_data_size_bytes = 1000 * 1000 * 1 # just 1 MiB + large_data_size_bytes = 1000 * 1000 * 10 # Some MiB should do it. + moderate_data_size_bytes = 1000 * 1000 * 1 # Just 1 MiB. @with_rw_repo("HEAD", bare=True) def test_large_data_streaming(self, rwrepo): - # TODO: This part overlaps with the same file in gitdb.test.performance.test_stream - # It should be shared if possible + # TODO: This part overlaps with the same file in + # gitdb.test.performance.test_stream. It should be shared if possible. ldb = LooseObjectDB(osp.join(rwrepo.git_dir, "objects")) for randomize in range(2): @@ -32,7 +37,7 @@ class TestObjDBPerformance(TestBigRepoR): elapsed = time() - st print("Done (in %f s)" % elapsed, file=sys.stderr) - # writing - due to the compression it will seem faster than it is + # Writing - due to the compression it will seem faster than it is. st = time() binsha = ldb.store(IStream("blob", size, stream)).binsha elapsed_add = time() - st @@ -45,7 +50,7 @@ class TestObjDBPerformance(TestBigRepoR): msg %= (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add) print(msg, file=sys.stderr) - # reading all at once + # Reading all at once. st = time() ostream = ldb.stream(binsha) shadata = ostream.read() @@ -57,7 +62,7 @@ class TestObjDBPerformance(TestBigRepoR): msg %= (size_kib, desc, elapsed_readall, size_kib / elapsed_readall) print(msg, file=sys.stderr) - # reading in chunks of 1 MiB + # Reading in chunks of 1 MiB. cs = 512 * 1000 chunks = [] st = time() @@ -86,20 +91,18 @@ class TestObjDBPerformance(TestBigRepoR): file=sys.stderr, ) - # del db file so git has something to do + # del db file so git has something to do. ostream = None - import gc - gc.collect() os.remove(db_file) # VS. CGIT ########## - # CGIT ! Can using the cgit programs be faster ? + # CGIT! Can using the cgit programs be faster? proc = rwrepo.git.hash_object("-w", "--stdin", as_process=True, istream=subprocess.PIPE) - # write file - pump everything in at once to be a fast as possible - data = stream.getvalue() # cache it + # Write file - pump everything in at once to be a fast as possible. + data = stream.getvalue() # Cache it. st = time() proc.stdin.write(data) proc.stdin.close() @@ -107,22 +110,22 @@ class TestObjDBPerformance(TestBigRepoR): proc.wait() gelapsed_add = time() - st del data - assert gitsha == bin_to_hex(binsha) # we do it the same way, right ? + assert gitsha == bin_to_hex(binsha) # We do it the same way, right? - # as its the same sha, we reuse our path + # As it's the same sha, we reuse our path. fsize_kib = osp.getsize(db_file) / 1000 msg = "Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)" msg %= (size_kib, fsize_kib, desc, gelapsed_add, size_kib / gelapsed_add) print(msg, file=sys.stderr) - # compare ... + # Compare. print( "Git-Python is %f %% faster than git when adding big %s files" % (100.0 - (elapsed_add / gelapsed_add) * 100, desc), file=sys.stderr, ) - # read all + # Read all. st = time() _hexsha, _typename, size, data = rwrepo.git.get_object_data(gitsha) gelapsed_readall = time() - st @@ -132,14 +135,14 @@ class TestObjDBPerformance(TestBigRepoR): file=sys.stderr, ) - # compare + # Compare. print( "Git-Python is %f %% faster than git when reading big %sfiles" % (100.0 - (elapsed_readall / gelapsed_readall) * 100, desc), file=sys.stderr, ) - # read chunks + # Read chunks. st = time() _hexsha, _typename, size, stream = rwrepo.git.stream_object_data(gitsha) while True: @@ -158,7 +161,7 @@ class TestObjDBPerformance(TestBigRepoR): ) print(msg, file=sys.stderr) - # compare + # Compare. print( "Git-Python is %f %% faster than git when reading big %s files in chunks" % (100.0 - (elapsed_readchunks / gelapsed_readchunks) * 100, desc), diff --git a/test/test_actor.py b/test/test_actor.py index f495ac0..5e66357 100644 --- a/test/test_actor.py +++ b/test/test_actor.py @@ -1,12 +1,12 @@ -# test_actor.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ -from test.lib import TestBase from git import Actor +from test.lib import TestBase + class TestActor(TestBase): def test_from_string_should_separate_name_and_email(self): @@ -14,7 +14,7 @@ class TestActor(TestBase): self.assertEqual("Michael Trier", a.name) self.assertEqual("mtrier@example.com", a.email) - # base type capabilities + # Base type capabilities assert a == a assert not (a != a) m = set() diff --git a/test/test_base.py b/test/test_base.py index b77c811..86bcc5c 100644 --- a/test/test_base.py +++ b/test/test_base.py @@ -1,29 +1,26 @@ -# -*- coding: utf-8 -*- -# test_base.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +import gc import os +import os.path as osp import sys import tempfile -from unittest import SkipTest, skipIf +from unittest import skipIf from git import Repo -from git.objects import Blob, Tree, Commit, TagObject -from git.compat import is_win +from git.objects import Blob, Commit, TagObject, Tree +import git.objects.base as base from git.objects.util import get_object_type_by_name -from test.lib import TestBase as _TestBase, with_rw_repo, with_rw_and_rw_remote_repo -from git.util import hex_to_bin, HIDE_WINDOWS_FREEZE_ERRORS +from git.util import HIDE_WINDOWS_FREEZE_ERRORS, hex_to_bin -import git.objects.base as base -import os.path as osp +from test.lib import TestBase as _TestBase, with_rw_and_rw_remote_repo, with_rw_repo class TestBase(_TestBase): def tearDown(self): - import gc - gc.collect() type_tuples = ( @@ -34,7 +31,7 @@ class TestBase(_TestBase): ) def test_base_object(self): - # test interface of base object classes + # Test interface of base object classes. types = (Blob, Tree, Commit, TagObject) self.assertEqual(len(types), len(self.type_tuples)) @@ -61,25 +58,27 @@ class TestBase(_TestBase): if isinstance(item, base.IndexObject): num_index_objs += 1 - if hasattr(item, "path"): # never runs here - assert not item.path.startswith("/") # must be relative + if hasattr(item, "path"): # Never runs here. + assert not item.path.startswith("/") # Must be relative. assert isinstance(item.mode, int) # END index object check - # read from stream + # Read from stream. data_stream = item.data_stream data = data_stream.read() assert data - tmpfilename = tempfile.mktemp(suffix="test-stream") - with open(tmpfilename, "wb+") as tmpfile: + with tempfile.NamedTemporaryFile(suffix="test-stream", delete=False) as tmpfile: self.assertEqual(item, item.stream_data(tmpfile)) tmpfile.seek(0) self.assertEqual(tmpfile.read(), data) - os.remove(tmpfilename) + + # Remove the file this way, instead of with a context manager or "finally", + # so it is only removed on success, and we can inspect the file on failure. + os.remove(tmpfile.name) # END for each object type to create - # each has a unique sha + # Each has a unique sha. self.assertEqual(len(s), num_objs) self.assertEqual(len(s | s), num_objs) self.assertEqual(num_index_objs, 2) @@ -92,7 +91,7 @@ class TestBase(_TestBase): self.assertRaises(ValueError, get_object_type_by_name, b"doesntexist") def test_object_resolution(self): - # objects must be resolved to shas so they compare equal + # Objects must be resolved to shas so they compare equal. self.assertEqual(self.rorepo.head.reference.object, self.rorepo.active_branch.object) @with_rw_repo("HEAD", bare=True) @@ -122,24 +121,24 @@ class TestBase(_TestBase): file_path = osp.join(rw_repo.working_dir, filename) - # verify first that we could encode file name in this environment + # Verify first that we could encode file name in this environment. try: file_path.encode(sys.getfilesystemencoding()) except UnicodeEncodeError as e: - raise SkipTest("Environment doesn't support unicode filenames") from e + raise RuntimeError("Environment doesn't support unicode filenames") from e with open(file_path, "wb") as fp: fp.write(b"something") - if is_win: - # on windows, there is no way this works, see images on + if sys.platform == "win32": + # On Windows, there is no way this works, see images on: # https://github.com/gitpython-developers/GitPython/issues/147#issuecomment-68881897 - # Therefore, it must be added using the python implementation + # Therefore, it must be added using the Python implementation. rw_repo.index.add([file_path]) - # However, when the test winds down, rmtree fails to delete this file, which is recognized - # as ??? only. + # However, when the test winds down, rmtree fails to delete this file, which + # is recognized as ??? only. else: - # on posix, we can just add unicode files without problems + # On POSIX, we can just add Unicode files without problems. rw_repo.git.add(rw_repo.working_dir) - # end + rw_repo.index.commit("message") diff --git a/test/test_blob.py b/test/test_blob.py index 692522b..affaa60 100644 --- a/test/test_blob.py +++ b/test/test_blob.py @@ -1,12 +1,12 @@ -# test_blob.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ -from test.lib import TestBase from git import Blob +from test.lib import TestBase + class TestBlob(TestBase): def test_mime_type_should_return_mime_type_for_known_types(self): diff --git a/test/test_blob_filter.py b/test/test_blob_filter.py index cbaa30b..ddd8307 100644 --- a/test/test_blob_filter.py +++ b/test/test_blob_filter.py @@ -1,4 +1,8 @@ -"""Test the blob filter.""" +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Tests for the blob filter.""" + from pathlib import Path from typing import Sequence, Tuple from unittest.mock import MagicMock @@ -10,14 +14,15 @@ from git.objects import Blob from git.types import PathLike -# fmt: off -@pytest.mark.parametrize('paths, path, expected_result', [ - ((Path("foo"),), Path("foo"), True), - ((Path("foo"),), Path("foo/bar"), True), - ((Path("foo/bar"),), Path("foo"), False), - ((Path("foo"), Path("bar")), Path("foo"), True), -]) -# fmt: on +@pytest.mark.parametrize( + "paths, path, expected_result", + [ + ((Path("foo"),), Path("foo"), True), + ((Path("foo"),), Path("foo/bar"), True), + ((Path("foo/bar"),), Path("foo"), False), + ((Path("foo"), Path("bar")), Path("foo"), True), + ], +) def test_blob_filter(paths: Sequence[PathLike], path: PathLike, expected_result: bool) -> None: """Test the blob filter.""" blob_filter = BlobFilter(paths) diff --git a/test/test_clone.py b/test/test_clone.py index 1b4a6c3..126ef00 100644 --- a/test/test_clone.py +++ b/test/test_clone.py @@ -1,16 +1,12 @@ -# -*- coding: utf-8 -*- -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ from pathlib import Path import re import git -from .lib import ( - TestBase, - with_rw_directory, -) +from test.lib import TestBase, with_rw_directory class TestClone(TestBase): @@ -20,8 +16,8 @@ class TestClone(TestBase): garbage_file = non_empty_dir / "not-empty" garbage_file.write_text("Garbage!") - # Verify that cloning into the non-empty dir fails while complaining about - # the target directory not being empty/non-existent + # Verify that cloning into the non-empty dir fails while complaining about the + # target directory not being empty/non-existent. try: self.rorepo.clone(non_empty_dir) except git.GitCommandError as exc: diff --git a/test/test_commit.py b/test/test_commit.py index 527aea3..37c66e3 100644 --- a/test/test_commit.py +++ b/test/test_commit.py @@ -1,45 +1,48 @@ -# -*- coding: utf-8 -*- -# test_commit.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + import copy from datetime import datetime from io import BytesIO +import os.path as osp import re import sys import time from unittest.mock import Mock -from git import ( - Commit, - Actor, -) -from git import Repo +from gitdb import IStream + +from git import Actor, Commit, Repo from git.objects.util import tzoffset, utc from git.repo.fun import touch -from test.lib import TestBase, with_rw_repo, fixture_path, StringProcessAdapter -from test.lib import with_rw_directory -from gitdb import IStream -import os.path as osp +from test.lib import ( + StringProcessAdapter, + TestBase, + fixture_path, + with_rw_directory, + with_rw_repo, +) class TestCommitSerialization(TestBase): def assert_commit_serialization(self, rwrepo, commit_id, print_performance_info=False): - """traverse all commits in the history of commit identified by commit_id and check - if the serialization works. - :param print_performance_info: if True, we will show how fast we are""" - ns = 0 # num serializations - nds = 0 # num deserializations + """Traverse all commits in the history of commit identified by commit_id and + check if the serialization works. + + :param print_performance_info: If True, we will show how fast we are. + """ + ns = 0 # Number of serializations. + nds = 0 # Number of deserializations. st = time.time() for cm in rwrepo.commit(commit_id).traverse(): nds += 1 - # assert that we deserialize commits correctly, hence we get the same - # sha on serialization + # Assert that we deserialize commits correctly, hence we get the same + # sha on serialization. stream = BytesIO() cm._serialize(stream) ns += 1 @@ -71,13 +74,13 @@ class TestCommitSerialization(TestBase): streamlen = stream.tell() stream.seek(0) - # reuse istream + # Reuse istream. istream.size = streamlen istream.stream = stream istream.binsha = None nc.binsha = rwrepo.odb.store(istream).binsha - # if it worked, we have exactly the same contents ! + # If it worked, we have exactly the same contents! self.assertEqual(nc.hexsha, cm.hexsha) # END check commits elapsed = time.time() - st @@ -94,7 +97,7 @@ class TestCommitSerialization(TestBase): class TestCommit(TestCommitSerialization): def test_bake(self): commit = self.rorepo.commit("2454ae89983a4496a445ce347d7a41c0bb0ea7ae") - # commits have no dict + # Commits have no dict. self.assertRaises(AttributeError, setattr, commit, "someattr", 1) commit.author # bake @@ -132,9 +135,12 @@ class TestCommit(TestCommitSerialization): commit = self.rorepo.commit("33ebe7acec14b25c5f84f35a664803fcab2f7781") stats = commit.stats - def check_entries(d): + def check_entries(d, has_change_type=False): assert isinstance(d, dict) - for key in ("insertions", "deletions", "lines"): + keys = ("insertions", "deletions", "lines") + if has_change_type: + keys += ("change_type",) + for key in keys: assert key in d # END assertion helper @@ -145,10 +151,10 @@ class TestCommit(TestCommitSerialization): assert "files" in stats.total for _filepath, d in stats.files.items(): - check_entries(d) + check_entries(d, True) # END for each stated file - # assure data is parsed properly + # Check that data is parsed properly. michael = Actor._from_string("Michael Trier <mtrier@gmail.com>") self.assertEqual(commit.author, michael) self.assertEqual(commit.committer, michael) @@ -162,9 +168,9 @@ class TestCommit(TestCommitSerialization): commit = self.rorepo.commit("185d847ec7647fd2642a82d9205fb3d07ea71715") files = commit.stats.files - # when a file is renamed, the output of git diff is like "dir/{old => new}" - # unless we disable rename with --no-renames, which produces two lines - # one with the old path deletes and another with the new added + # When a file is renamed, the output of git diff is like "dir/{old => new}" + # unless we disable rename with --no-renames, which produces two lines, + # one with the old path deletes and another with the new added. self.assertEqual(len(files), 2) def check_entries(path, changes): @@ -190,7 +196,7 @@ class TestCommit(TestCommitSerialization): # END for each stated file def test_unicode_actor(self): - # assure we can parse unicode actors correctly + # Check that we can parse Unicode actors correctly. name = "Üäöß ÄußÉ" self.assertEqual(len(name), 9) special = Actor._from_string("%s <something@this.com>" % name) @@ -205,7 +211,7 @@ class TestCommit(TestCommitSerialization): p00 = p0.parents[0] p10 = p1.parents[0] - # basic branch first, depth first + # Basic branch first, depth first. dfirst = start.traverse(branch_first=False) bfirst = start.traverse(branch_first=True) self.assertEqual(next(dfirst), p0) @@ -216,7 +222,7 @@ class TestCommit(TestCommitSerialization): self.assertEqual(next(bfirst), p00) self.assertEqual(next(bfirst), p10) - # at some point, both iterations should stop + # At some point, both iterations should stop. self.assertEqual(list(bfirst)[-1], first) stoptraverse = self.rorepo.commit("254d04aa3180eb8b8daf7b7ff25f010cd69b4e7d").traverse( @@ -235,52 +241,51 @@ class TestCommit(TestCommitSerialization): stoptraverse = self.rorepo.commit("254d04aa3180eb8b8daf7b7ff25f010cd69b4e7d").traverse(as_edge=True) self.assertEqual(len(next(stoptraverse)), 2) - # ignore self + # Ignore self self.assertEqual(next(start.traverse(ignore_self=False)), start) - # depth + # Depth self.assertEqual(len(list(start.traverse(ignore_self=False, depth=0))), 1) - # prune + # Prune self.assertEqual(next(start.traverse(branch_first=1, prune=lambda i, d: i == p0)), p1) - # predicate + # Predicate self.assertEqual(next(start.traverse(branch_first=1, predicate=lambda i, d: i == p1)), p1) - # traversal should stop when the beginning is reached + # Traversal should stop when the beginning is reached. self.assertRaises(StopIteration, next, first.traverse()) - # parents of the first commit should be empty ( as the only parent has a null - # sha ) + # Parents of the first commit should be empty (as the only parent has a null sha) self.assertEqual(len(first.parents), 0) def test_iteration(self): - # we can iterate commits + # We can iterate commits. all_commits = Commit.list_items(self.rorepo, self.rorepo.head) assert all_commits self.assertEqual(all_commits, list(self.rorepo.iter_commits())) - # this includes merge commits + # This includes merge commits. mcomit = self.rorepo.commit("d884adc80c80300b4cc05321494713904ef1df2d") assert mcomit in all_commits - # we can limit the result to paths + # We can limit the result to paths. ltd_commits = list(self.rorepo.iter_commits(paths="CHANGES")) assert ltd_commits and len(ltd_commits) < len(all_commits) - # show commits of multiple paths, resulting in a union of commits + # Show commits of multiple paths, resulting in a union of commits. less_ltd_commits = list(Commit.iter_items(self.rorepo, "master", paths=("CHANGES", "AUTHORS"))) assert len(ltd_commits) < len(less_ltd_commits) class Child(Commit): def __init__(self, *args, **kwargs): - super(Child, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) child_commits = list(Child.iter_items(self.rorepo, "master", paths=("CHANGES", "AUTHORS"))) assert type(child_commits[0]) is Child def test_iter_items(self): - # pretty not allowed + # pretty not allowed. self.assertRaises(ValueError, Commit.iter_items, self.rorepo, "master", pretty="raw") def test_rev_list_bisect_all(self): @@ -311,14 +316,15 @@ class TestCommit(TestCommitSerialization): touch(path) rw_repo.index.add([path]) rw_repo.index.commit("initial commit") - list(rw_repo.iter_commits(rw_repo.head.ref)) # should fail unless bug is fixed + list(rw_repo.iter_commits(rw_repo.head.ref)) # Should fail unless bug is fixed. def test_count(self): self.assertEqual(self.rorepo.tag("refs/tags/0.1.5").commit.count(), 143) def test_list(self): - # This doesn't work anymore, as we will either attempt getattr with bytes, or compare 20 byte string - # with actual 20 byte bytes. This usage makes no sense anyway + # This doesn't work anymore, as we will either attempt getattr with bytes, or + # compare 20 byte string with actual 20 byte bytes. This usage makes no sense + # anyway. assert isinstance( Commit.list_items(self.rorepo, "0.1.5", max_count=5)["5117c9c8a4d3af19a9958677e45cda9269de1541"], Commit, @@ -340,7 +346,7 @@ class TestCommit(TestCommitSerialization): self.assertNotEqual(commit2, commit3) def test_iter_parents(self): - # should return all but ourselves, even if skip is defined + # Should return all but ourselves, even if skip is defined. c = self.rorepo.commit("0.1.5") for skip in (0, 1): piter = c.iter_parents(skip=skip) @@ -355,17 +361,17 @@ class TestCommit(TestCommitSerialization): @with_rw_repo("HEAD", bare=True) def test_serialization(self, rwrepo): - # create all commits of our repo + # Create all commits of our repo. self.assert_commit_serialization(rwrepo, "0.1.6") def test_serialization_unicode_support(self): self.assertEqual(Commit.default_encoding.lower(), "utf-8") - # create a commit with unicode in the message, and the author's name - # Verify its serialization and deserialization + # Create a commit with Unicode in the message, and the author's name. + # Verify its serialization and deserialization. cmt = self.rorepo.commit("0.1.6") - assert isinstance(cmt.message, str) # it automatically decodes it as such - assert isinstance(cmt.author.name, str) # same here + assert isinstance(cmt.message, str) # It automatically decodes it as such. + assert isinstance(cmt.author.name, str) # Same here. cmt.message = "üäêèß" self.assertEqual(len(cmt.message), 5) @@ -383,8 +389,8 @@ class TestCommit(TestCommitSerialization): self.assertEqual(cmt.author.name, ncmt.author.name) self.assertEqual(cmt.message, ncmt.message) - # actually, it can't be printed in a shell as repr wants to have ascii only - # it appears + # Actually, it can't be printed in a shell as repr wants to have ascii only it + # appears. cmt.author.__repr__() def test_invalid_commit(self): @@ -475,7 +481,7 @@ JzJMZDRLQLFvnzqZuCjE commit.authored_datetime, datetime(2009, 10, 8, 18, 17, 5, tzinfo=tzoffset(-7200)), commit.authored_datetime, - ) # noqa + ) self.assertEqual( commit.authored_datetime, datetime(2009, 10, 8, 16, 17, 5, tzinfo=utc), @@ -498,14 +504,15 @@ JzJMZDRLQLFvnzqZuCjE KEY_2 = "Key" VALUE_2 = "Value with inner spaces" - # Check the following trailer example is extracted from multiple msg variations + # Check that the following trailer example is extracted from multiple msg + # variations. TRAILER = f"{KEY_1}: {VALUE_1_1}\n{KEY_2}: {VALUE_2}\n{KEY_1}: {VALUE_1_2}" msgs = [ f"Subject\n\n{TRAILER}\n", f"Subject\n \nSome body of a function\n \n{TRAILER}\n", f"Subject\n \nSome body of a function\n\nnon-key: non-value\n\n{TRAILER}\n", ( - # check when trailer has inconsistent whitespace + # Check when trailer has inconsistent whitespace. f"Subject\n \nSome multiline\n body of a function\n\nnon-key: non-value\n\n" f"{KEY_1}:{VALUE_1_1}\n{KEY_2} : {VALUE_2}\n{KEY_1}: {VALUE_1_2}\n" ), @@ -523,7 +530,7 @@ JzJMZDRLQLFvnzqZuCjE KEY_2: [VALUE_2], } - # check that trailer stays empty for multiple msg combinations + # Check that the trailer stays empty for multiple msg combinations. msgs = [ "Subject\n", "Subject\n\nBody with some\nText\n", @@ -539,7 +546,7 @@ JzJMZDRLQLFvnzqZuCjE assert commit.trailers_list == [] assert commit.trailers_dict == {} - # check that only the last key value paragraph is evaluated + # Check that only the last key value paragraph is evaluated. commit = copy.copy(self.rorepo.commit("master")) commit.message = f"Subject\n\nMultiline\nBody\n\n{KEY_1}: {VALUE_1_1}\n\n{KEY_2}: {VALUE_2}\n" assert commit.trailers_list == [(KEY_2, VALUE_2)] diff --git a/test/test_config.py b/test/test_config.py index 481e129..9299742 100644 --- a/test/test_config.py +++ b/test/test_config.py @@ -1,26 +1,22 @@ -# test_config.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ import glob import io import os +import os.path as osp +import sys from unittest import mock +import pytest + from git import GitConfigParser from git.config import _OMD, cp -from test.lib import ( - TestCase, - fixture_path, - SkipTest, -) -from test.lib import with_rw_directory - -import os.path as osp from git.util import rmfile +from test.lib import SkipTest, TestCase, fixture_path, with_rw_directory _tc_lock_fpaths = osp.join(osp.dirname(__file__), "fixtures/*.lock") @@ -46,29 +42,30 @@ class TestBase(TestCase): return sio def test_read_write(self): - # writer must create the exact same file as the one read before + # The writer must create the exact same file as the one read before. for filename in ("git_config", "git_config_global"): file_obj = self._to_memcache(fixture_path(filename)) with GitConfigParser(file_obj, read_only=False) as w_config: - w_config.read() # enforce reading + w_config.read() # Enforce reading. assert w_config._sections - w_config.write() # enforce writing + w_config.write() # Enforce writing. - # we stripped lines when reading, so the results differ + # We stripped lines when reading, so the results differ. assert file_obj.getvalue() self.assertEqual( file_obj.getvalue(), self._to_memcache(fixture_path(filename)).getvalue(), ) - # creating an additional config writer must fail due to exclusive access + # Creating an additional config writer must fail due to exclusive + # access. with self.assertRaises(IOError): GitConfigParser(file_obj, read_only=False) - # should still have a lock and be able to make changes + # Should still have a lock and be able to make changes. assert w_config._lock._has_lock() - # changes should be written right away + # Changes should be written right away. sname = "my_section" oname = "mykey" val = "myvalue" @@ -93,13 +90,14 @@ class TestBase(TestCase): def test_includes_order(self): with GitConfigParser(list(map(fixture_path, ("git_config", "git_config_global")))) as r_config: - r_config.read() # enforce reading - # Simple inclusions, again checking them taking precedence + r_config.read() # Enforce reading. + # Simple inclusions, again checking them taking precedence. assert r_config.get_value("sec", "var0") == "value0_included" - # This one should take the git_config_global value since included - # values must be considered as soon as they get them + # This one should take the git_config_global value since included values + # must be considered as soon as they get them. assert r_config.get_value("diff", "tool") == "meld" try: + # FIXME: Split this assertion out somehow and mark it xfail (or fix it). assert r_config.get_value("sec", "var1") == "value1_main" except AssertionError as e: raise SkipTest("Known failure -- included values are not in effect right away") from e @@ -110,16 +108,17 @@ class TestBase(TestCase): gcp = GitConfigParser(fpl, read_only=False) with gcp as cw: cw.set_value("include", "some_value", "a") - # entering again locks the file again... + # Entering again locks the file again... with gcp as cw: cw.set_value("include", "some_other_value", "b") - # ...so creating an additional config writer must fail due to exclusive access + # ...so creating an additional config writer must fail due to exclusive + # access. with self.assertRaises(IOError): GitConfigParser(fpl, read_only=False) # but work when the lock is removed with GitConfigParser(fpl, read_only=False): assert osp.exists(fpl) - # reentering with an existing lock must fail due to exclusive access + # Reentering with an existing lock must fail due to exclusive access. with self.assertRaises(IOError): gcp.__enter__() @@ -129,7 +128,7 @@ class TestBase(TestCase): ev = "ruby -e '\n" ev += " system %(git), %(merge-file), %(--marker-size=%L), %(%A), %(%O), %(%B)\n" ev += " b = File.read(%(%A))\n" - ev += " b.sub!(/^<+ .*\\nActiveRecord::Schema\\.define.:version => (\\d+). do\\n=+\\nActiveRecord::Schema\\." # noqa E501 + ev += " b.sub!(/^<+ .*\\nActiveRecord::Schema\\.define.:version => (\\d+). do\\n=+\\nActiveRecord::Schema\\." # noqa: E501 ev += "define.:version => (\\d+). do\\n>+ .*/) do\n" ev += " %(ActiveRecord::Schema.define(:version => #{[$1, $2].max}) do)\n" ev += " end\n" @@ -143,6 +142,14 @@ class TestBase(TestCase): ) self.assertEqual(len(config.sections()), 23) + def test_config_value_with_trailing_new_line(self): + config_content = b'[section-header]\nkey:"value\n"' + config_file = io.BytesIO(config_content) + config_file.name = "multiline_value.config" + + git_config = GitConfigParser(config_file) + git_config.read() # This should not throw an exception + def test_base(self): path_repo = fixture_path("git_config") path_global = fixture_path("git_config_global") @@ -151,7 +158,7 @@ class TestBase(TestCase): num_sections = 0 num_options = 0 - # test reader methods + # Test reader methods. assert r_config._is_initialized is False for section in r_config.sections(): num_sections += 1 @@ -164,7 +171,7 @@ class TestBase(TestCase): assert "\n" not in option assert "\n" not in val - # writing must fail + # Writing must fail. with self.assertRaises(IOError): r_config.set(section, option, None) with self.assertRaises(IOError): @@ -176,11 +183,11 @@ class TestBase(TestCase): assert num_sections and num_options assert r_config._is_initialized is True - # get value which doesn't exist, with default + # Get value which doesn't exist, with default. default = "my default value" assert r_config.get_value("doesnt", "exist", default) == default - # it raises if there is no default though + # It raises if there is no default though. with self.assertRaises(cp.NoSectionError): r_config.get_value("doesnt", "exist") @@ -189,13 +196,9 @@ class TestBase(TestCase): def write_test_value(cw, value): cw.set_value(value, "value", value) - # end - def check_test_value(cr, value): assert cr.get_value(value, "value") == value - # end - # PREPARE CONFIG FILE A fpa = osp.join(rw_dir, "a") with GitConfigParser(fpa, read_only=False) as cw: @@ -224,10 +227,10 @@ class TestBase(TestCase): with GitConfigParser(fpa, read_only=True) as cr: for tv in ("a", "b", "c"): check_test_value(cr, tv) - # end for each test to verify + # END for each test to verify assert len(cr.items("include")) == 8, "Expected all include sections to be merged" - # test writable config writers - assure write-back doesn't involve includes + # Test writable config writers - assure write-back doesn't involve includes. with GitConfigParser(fpa, read_only=False, merge_includes=True) as cw: tv = "x" write_test_value(cw, tv) @@ -236,20 +239,25 @@ class TestBase(TestCase): with self.assertRaises(cp.NoSectionError): check_test_value(cr, tv) - # But can make it skip includes altogether, and thus allow write-backs + # But can make it skip includes altogether, and thus allow write-backs. with GitConfigParser(fpa, read_only=False, merge_includes=False) as cw: write_test_value(cw, tv) with GitConfigParser(fpa, read_only=True) as cr: check_test_value(cr, tv) + @pytest.mark.xfail( + sys.platform == "win32", + reason='Second config._has_includes() assertion fails (for "config is included if path is matching git_dir")', + raises=AssertionError, + ) @with_rw_directory def test_conditional_includes_from_git_dir(self, rw_dir): - # Initiate repository path + # Initiate repository path. git_dir = osp.join(rw_dir, "target1", "repo1") os.makedirs(git_dir) - # Initiate mocked repository + # Initiate mocked repository. repo = mock.Mock(git_dir=git_dir) # Initiate config files. @@ -312,11 +320,11 @@ class TestBase(TestCase): @with_rw_directory def test_conditional_includes_from_branch_name(self, rw_dir): - # Initiate mocked branch + # Initiate mocked branch. branch = mock.Mock() type(branch).name = mock.PropertyMock(return_value="/foo/branch") - # Initiate mocked repository + # Initiate mocked repository. repo = mock.Mock(active_branch=branch) # Initiate config files. diff --git a/test/test_db.py b/test/test_db.py index ebf73b5..72d63b4 100644 --- a/test/test_db.py +++ b/test/test_db.py @@ -1,26 +1,27 @@ -# test_repo.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +import os.path as osp + from git.db import GitCmdObjectDB from git.exc import BadObject -from test.lib import TestBase from git.util import bin_to_hex -import os.path as osp +from test.lib import TestBase class TestDB(TestBase): def test_base(self): gdb = GitCmdObjectDB(osp.join(self.rorepo.git_dir, "objects"), self.rorepo.git) - # partial to complete - works with everything + # Partial to complete - works with everything. hexsha = bin_to_hex(gdb.partial_to_complete_sha_hex("0.1.6")) assert len(hexsha) == 40 assert bin_to_hex(gdb.partial_to_complete_sha_hex(hexsha[:20])) == hexsha - # fails with BadObject + # Fails with BadObject. for invalid_rev in ("0000", "bad/ref", "super bad"): self.assertRaises(BadObject, gdb.partial_to_complete_sha_hex, invalid_rev) diff --git a/test/test_diff.py b/test/test_diff.py index 5aa4408..612fbd9 100644 --- a/test/test_diff.py +++ b/test/test_diff.py @@ -1,29 +1,21 @@ -# coding: utf-8 -# test_diff.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ -import ddt +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +import gc +import os.path as osp import shutil +import sys import tempfile -from git import ( - Repo, - GitCommandError, - Diff, - DiffIndex, - NULL_TREE, - Submodule, -) + +import ddt +import pytest + +from git import NULL_TREE, Diff, DiffIndex, Diffable, GitCommandError, Repo, Submodule from git.cmd import Git -from test.lib import ( - TestBase, - StringProcessAdapter, - fixture, -) -from test.lib import with_rw_directory -import os.path as osp +from test.lib import StringProcessAdapter, TestBase, fixture, with_rw_directory def to_raw(input): @@ -37,14 +29,12 @@ class TestDiff(TestBase): self.submodule_dir = tempfile.mkdtemp() def tearDown(self): - import gc - gc.collect() shutil.rmtree(self.repo_dir) shutil.rmtree(self.submodule_dir) def _assert_diff_format(self, diffs): - # verify that the format of the diff is sane + # Verify that the format of the diff is sane. for diff in diffs: if diff.a_mode: assert isinstance(diff.a_mode, int) @@ -60,7 +50,7 @@ class TestDiff(TestBase): @with_rw_directory def test_diff_with_staged_file(self, rw_dir): - # SETUP INDEX WITH MULTIPLE STAGES + # SET UP INDEX WITH MULTIPLE STAGES r = Repo.init(rw_dir) fp = osp.join(rw_dir, "hello.txt") with open(fp, "w") as fs: @@ -88,11 +78,11 @@ class TestDiff(TestBase): fs.write("Hallo Welt") r.git.commit(all=True, message="change on topic branch") - # there must be a merge-conflict + # There must be a merge conflict. with self.assertRaises(GitCommandError): r.git.cherry_pick("master") - # Now do the actual testing - this should just work + # Now do the actual testing - this should just work. self.assertEqual(len(r.index.diff(None)), 2) self.assertEqual( @@ -222,7 +212,7 @@ class TestDiff(TestBase): for dr in res: self.assertTrue(dr.diff.startswith(b"@@"), dr) self.assertIsNotNone(str(dr), "Diff to string conversion should be possible") - # end for each diff + # END for each diff dr = res[3] assert dr.diff.endswith(b"+Binary files a/rps and b/rps differ\n") @@ -255,7 +245,7 @@ class TestDiff(TestBase): self.assertIsNotNone(diff_index[0].new_file) self.assertEqual(diff_index[0].diff, "") - # ...and with creating a patch + # ...and with creating a patch. diff_index = initial_commit.diff(NULL_TREE, create_patch=True) self.assertIsNone(diff_index[0].a_path, repr(diff_index[0].a_path)) self.assertEqual(diff_index[0].b_path, "CHANGES", repr(diff_index[0].b_path)) @@ -282,18 +272,18 @@ class TestDiff(TestBase): self.assertEqual(res[10].b_rawpath, b"path/\x80-invalid-unicode-path.txt") # The "Moves" - # NOTE: The path prefixes a/ and b/ here are legit! We're actually - # verifying that it's not "a/a/" that shows up, see the fixture data. - self.assertEqual(res[11].a_path, "a/with spaces") # NOTE: path a/ here legit! - self.assertEqual(res[11].b_path, "b/with some spaces") # NOTE: path b/ here legit! + # NOTE: The path prefixes "a/" and "b/" here are legit! We're actually verifying + # that it's not "a/a/" that shows up; see the fixture data. + self.assertEqual(res[11].a_path, "a/with spaces") # NOTE: path "a/"" legit! + self.assertEqual(res[11].b_path, "b/with some spaces") # NOTE: path "b/"" legit! self.assertEqual(res[12].a_path, "a/ending in a space ") self.assertEqual(res[12].b_path, "b/ending with space ") self.assertEqual(res[13].a_path, 'a/"with-quotes"') self.assertEqual(res[13].b_path, 'b/"with even more quotes"') def test_diff_patch_format(self): - # test all of the 'old' format diffs for completeness - it should at least - # be able to deal with it + # Test all of the 'old' format diffs for completeness - it should at least be + # able to deal with it. fixtures = ( "diff_2", "diff_2f", @@ -319,16 +309,21 @@ class TestDiff(TestBase): self.assertIsNone(diff_index[0].a_path, repr(diff_index[0].a_path)) self.assertEqual(diff_index[0].b_path, "file with spaces", repr(diff_index[0].b_path)) + @pytest.mark.xfail( + sys.platform == "win32", + reason='"Access is denied" when tearDown calls shutil.rmtree', + raises=PermissionError, + ) def test_diff_submodule(self): """Test that diff is able to correctly diff commits that cover submodule changes""" - # Init a temp git repo that will be referenced as a submodule + # Init a temp git repo that will be referenced as a submodule. sub = Repo.init(self.submodule_dir) with open(self.submodule_dir + "/subfile", "w") as sub_subfile: sub_subfile.write("") sub.index.add(["subfile"]) sub.index.commit("first commit") - # Init a temp git repo that will incorporate the submodule + # Init a temp git repo that will incorporate the submodule. repo = Repo.init(self.repo_dir) with open(self.repo_dir + "/test", "w") as foo_test: foo_test.write("") @@ -337,7 +332,7 @@ class TestDiff(TestBase): repo.index.commit("first commit") repo.create_tag("1") - # Add a commit to the submodule + # Add a commit to the submodule. submodule = repo.submodule("subtest") with open(self.repo_dir + "/sub/subfile", "w") as foo_sub_subfile: foo_sub_subfile.write("blub") @@ -345,19 +340,20 @@ class TestDiff(TestBase): submodule.module().index.commit("changed subfile") submodule.binsha = submodule.module().head.commit.binsha - # Commit submodule updates in parent repo + # Commit submodule updates in parent repo. repo.index.add([submodule]) repo.index.commit("submodule changed") repo.create_tag("2") diff = repo.commit("1").diff(repo.commit("2"))[0] - # If diff is unable to find the commit hashes (looks in wrong repo) the *_blob.size - # property will be a string containing exception text, an int indicates success + # If diff is unable to find the commit hashes (looks in wrong repo) the + # *_blob.size property will be a string containing exception text, an int + # indicates success. self.assertIsInstance(diff.a_blob.size, int) self.assertIsInstance(diff.b_blob.size, int) def test_diff_interface(self): - # test a few variations of the main diff routine + """Test a few variations of the main diff routine.""" assertion_map = {} for i, commit in enumerate(self.rorepo.iter_commits("0.1.6", max_count=2)): diff_item = commit @@ -365,7 +361,7 @@ class TestDiff(TestBase): diff_item = commit.tree # END use tree every second item - for other in (None, NULL_TREE, commit.Index, commit.parents[0]): + for other in (None, NULL_TREE, commit.INDEX, commit.parents[0]): for paths in (None, "CHANGES", ("CHANGES", "lib")): for create_patch in range(2): diff_index = diff_item.diff(other=other, paths=paths, create_patch=create_patch) @@ -379,7 +375,7 @@ class TestDiff(TestBase): assertion_map[key] = assertion_map[key] + len(list(diff_index.iter_change_type(ct))) # END for each changetype - # check entries + # Check entries. diff_set = set() diff_set.add(diff_index[0]) diff_set.add(diff_index[0]) @@ -398,24 +394,36 @@ class TestDiff(TestBase): # END for each other side # END for each commit - # assert we could always find at least one instance of the members we - # can iterate in the diff index - if not this indicates its not working correctly - # or our test does not span the whole range of possibilities + # Assert that we could always find at least one instance of the members we can + # iterate in the diff index - if not this indicates its not working correctly or + # our test does not span the whole range of possibilities. for key, value in assertion_map.items(): self.assertIsNotNone(value, "Did not find diff for %s" % key) # END for each iteration type - # test path not existing in the index - should be ignored + # Test path not existing in the index - should be ignored. c = self.rorepo.head.commit cp = c.parents[0] diff_index = c.diff(cp, ["does/not/exist"]) self.assertEqual(len(diff_index), 0) + def test_diff_interface_stability(self): + """Test that the Diffable.Index redefinition should not break compatibility.""" + self.assertIs( + Diffable.Index, + Diffable.INDEX, + "The old and new class attribute names must be aliases.", + ) + self.assertIs( + type(Diffable.INDEX).__eq__, + object.__eq__, + "Equality comparison must be reference-based.", + ) + @with_rw_directory def test_rename_override(self, rw_dir): - """Test disabling of diff rename detection""" - - # create and commit file_a.txt + """Test disabling of diff rename detection.""" + # Create and commit file_a.txt. repo = Repo.init(rw_dir) file_a = osp.join(rw_dir, "file_a.txt") with open(file_a, "w", encoding="utf-8") as outfile: @@ -423,10 +431,10 @@ class TestDiff(TestBase): repo.git.add(Git.polish_url(file_a)) repo.git.commit(message="Added file_a.txt") - # remove file_a.txt + # Remove file_a.txt. repo.git.rm(Git.polish_url(file_a)) - # create and commit file_b.txt with similarity index of 52 + # Create and commit file_b.txt with similarity index of 52. file_b = osp.join(rw_dir, "file_b.txt") with open(file_b, "w", encoding="utf-8") as outfile: outfile.write("hello world\nhello world") @@ -436,7 +444,7 @@ class TestDiff(TestBase): commit_a = repo.commit("HEAD") commit_b = repo.commit("HEAD~1") - # check default diff command with renamed files enabled + # Check default diff command with renamed files enabled. diffs = commit_b.diff(commit_a) self.assertEqual(1, len(diffs)) diff = diffs[0] @@ -444,38 +452,99 @@ class TestDiff(TestBase): self.assertEqual("file_a.txt", diff.rename_from) self.assertEqual("file_b.txt", diff.rename_to) - # check diff with rename files disabled + # Check diff with rename files disabled. diffs = commit_b.diff(commit_a, no_renames=True) self.assertEqual(2, len(diffs)) - # check fileA.txt deleted + # Check fileA.txt deleted. diff = diffs[0] self.assertEqual(True, diff.deleted_file) self.assertEqual("file_a.txt", diff.a_path) - # check fileB.txt added + # Check fileB.txt added. diff = diffs[1] self.assertEqual(True, diff.new_file) self.assertEqual("file_b.txt", diff.a_path) - # check diff with high similarity index + # Check diff with high similarity index. diffs = commit_b.diff(commit_a, split_single_char_options=False, M="75%") self.assertEqual(2, len(diffs)) - # check fileA.txt deleted + # Check fileA.txt deleted. diff = diffs[0] self.assertEqual(True, diff.deleted_file) self.assertEqual("file_a.txt", diff.a_path) - # check fileB.txt added + # Check fileB.txt added. diff = diffs[1] self.assertEqual(True, diff.new_file) self.assertEqual("file_b.txt", diff.a_path) - # check diff with low similarity index + # Check diff with low similarity index. diffs = commit_b.diff(commit_a, split_single_char_options=False, M="40%") self.assertEqual(1, len(diffs)) diff = diffs[0] self.assertEqual(True, diff.renamed_file) self.assertEqual("file_a.txt", diff.rename_from) self.assertEqual("file_b.txt", diff.rename_to) + + @with_rw_directory + def test_diff_patch_with_external_engine(self, rw_dir): + repo = Repo.init(rw_dir) + gitignore = osp.join(rw_dir, ".gitignore") + + # First commit + with open(gitignore, "w") as f: + f.write("first_line\n") + repo.git.add(".gitignore") + repo.index.commit("first commit") + + # Adding second line and committing + with open(gitignore, "a") as f: + f.write("second_line\n") + repo.git.add(".gitignore") + repo.index.commit("second commit") + + # Adding third line and staging + with open(gitignore, "a") as f: + f.write("third_line\n") + repo.git.add(".gitignore") + + # Adding fourth line + with open(gitignore, "a") as f: + f.write("fourth_line\n") + + # Set the external diff engine + with repo.config_writer(config_level="repository") as writer: + writer.set_value("diff", "external", "bogus_diff_engine") + + head_against_head = repo.head.commit.diff("HEAD^", create_patch=True) + self.assertEqual(len(head_against_head), 1) + head_against_index = repo.head.commit.diff(create_patch=True) + self.assertEqual(len(head_against_index), 1) + head_against_working_tree = repo.head.commit.diff(None, create_patch=True) + self.assertEqual(len(head_against_working_tree), 1) + + index_against_head = repo.index.diff("HEAD", create_patch=True) + self.assertEqual(len(index_against_head), 1) + index_against_working_tree = repo.index.diff(None, create_patch=True) + self.assertEqual(len(index_against_working_tree), 1) + + @with_rw_directory + def test_beginning_space(self, rw_dir): + # Create a file beginning by a whitespace + repo = Repo.init(rw_dir) + file = osp.join(rw_dir, " file.txt") + with open(file, "w") as f: + f.write("hello world") + repo.git.add(Git.polish_url(file)) + repo.index.commit("first commit") + + # Diff the commit with an empty tree + # and check the paths + diff_index = repo.head.commit.diff(NULL_TREE) + d = diff_index[0] + a_path = d.a_path + b_path = d.b_path + self.assertEqual(a_path, " file.txt") + self.assertEqual(b_path, " file.txt") diff --git a/test/test_docs.py b/test/test_docs.py index 79e1f1b..cc0bbf2 100644 --- a/test/test_docs.py +++ b/test/test_docs.py @@ -1,10 +1,11 @@ -# -*- coding: utf-8 -*- -# test_git.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +import gc import os +import os.path import sys import pytest @@ -12,26 +13,26 @@ import pytest from test.lib import TestBase from test.lib.helper import with_rw_directory -import os.path - class Tutorials(TestBase): def tearDown(self): - import gc - gc.collect() - # @skipIf(HIDE_WINDOWS_KNOWN_ERRORS, ## ACTUALLY skipped by `git.submodule.base#L869`. + # ACTUALLY skipped by git.util.rmtree (in local onerror function), from the last + # call to it via git.objects.submodule.base.Submodule.remove + # (at "handle separate bare repository"), line 1062. + # + # @skipIf(HIDE_WINDOWS_KNOWN_ERRORS, # "FIXME: helper.wrapper fails with: PermissionError: [WinError 5] Access is denied: " - # "'C:\\Users\\appveyor\\AppData\\Local\\Temp\\1\\test_work_tree_unsupportedryfa60di\\master_repo\\.git\\objects\\pack\\pack-bc9e0787aef9f69e1591ef38ea0a6f566ec66fe3.idx") # noqa E501 + # "'C:\\Users\\appveyor\\AppData\\Local\\Temp\\1\\test_work_tree_unsupportedryfa60di\\master_repo\\.git\\objects\\pack\\pack-bc9e0787aef9f69e1591ef38ea0a6f566ec66fe3.idx") # noqa: E501 @with_rw_directory def test_init_repo_object(self, rw_dir): # [1-test_init_repo_object] from git import Repo # rorepo is a Repo instance pointing to the git-python repository. - # For all you know, the first argument to Repo is a path to the repository - # you want to work with + # For all you know, the first argument to Repo is a path to the repository you + # want to work with. repo = Repo(self.rorepo.working_tree_dir) assert not repo.bare # ![1-test_init_repo_object] @@ -42,20 +43,20 @@ class Tutorials(TestBase): # ![2-test_init_repo_object] # [3-test_init_repo_object] - repo.config_reader() # get a config reader for read-only access - with repo.config_writer(): # get a config writer to change configuration - pass # call release() to be sure changes are written and locks are released + repo.config_reader() # Get a config reader for read-only access. + with repo.config_writer(): # Get a config writer to change configuration. + pass # Call release() to be sure changes are written and locks are released. # ![3-test_init_repo_object] # [4-test_init_repo_object] - assert not bare_repo.is_dirty() # check the dirty state - repo.untracked_files # retrieve a list of untracked files + assert not bare_repo.is_dirty() # Check the dirty state. + repo.untracked_files # Retrieve a list of untracked files. # ['my_untracked_file'] # ![4-test_init_repo_object] # [5-test_init_repo_object] cloned_repo = repo.clone(os.path.join(rw_dir, "to/this/path")) - assert cloned_repo.__class__ is Repo # clone an existing repository + assert cloned_repo.__class__ is Repo # Clone an existing repository. assert Repo.init(os.path.join(rw_dir, "path/for/new/repo")).__class__ is Repo # ![5-test_init_repo_object] @@ -66,9 +67,9 @@ class Tutorials(TestBase): # repository paths # [7-test_init_repo_object] - assert os.path.isdir(cloned_repo.working_tree_dir) # directory with your work files - assert cloned_repo.git_dir.startswith(cloned_repo.working_tree_dir) # directory containing the git repository - assert bare_repo.working_tree_dir is None # bare repositories have no working tree + assert os.path.isdir(cloned_repo.working_tree_dir) # Directory with your work files. + assert cloned_repo.git_dir.startswith(cloned_repo.working_tree_dir) # Directory containing the git repository. + assert bare_repo.working_tree_dir is None # Bare repositories have no working tree. # ![7-test_init_repo_object] # heads, tags and references @@ -76,59 +77,59 @@ class Tutorials(TestBase): # [8-test_init_repo_object] self.assertEqual( repo.head.ref, - repo.heads.master, # head is a sym-ref pointing to master + repo.heads.master, # head is a sym-ref pointing to master. "It's ok if TC not running from `master`.", ) - self.assertEqual(repo.tags["0.3.5"], repo.tag("refs/tags/0.3.5")) # you can access tags in various ways too - self.assertEqual(repo.refs.master, repo.heads["master"]) # .refs provides all refs, ie heads ... + self.assertEqual(repo.tags["0.3.5"], repo.tag("refs/tags/0.3.5")) # You can access tags in various ways too. + self.assertEqual(repo.refs.master, repo.heads["master"]) # .refs provides all refs, i.e. heads... if "TRAVIS" not in os.environ: self.assertEqual(repo.refs["origin/master"], repo.remotes.origin.refs.master) # ... remotes ... - self.assertEqual(repo.refs["0.3.5"], repo.tags["0.3.5"]) # ... and tags + self.assertEqual(repo.refs["0.3.5"], repo.tags["0.3.5"]) # ... and tags. # ![8-test_init_repo_object] - # create a new head/branch + # Create a new head/branch. # [9-test_init_repo_object] - new_branch = cloned_repo.create_head("feature") # create a new branch ... + new_branch = cloned_repo.create_head("feature") # Create a new branch ... assert cloned_repo.active_branch != new_branch # which wasn't checked out yet ... - self.assertEqual(new_branch.commit, cloned_repo.active_branch.commit) # pointing to the checked-out commit - # It's easy to let a branch point to the previous commit, without affecting anything else - # Each reference provides access to the git object it points to, usually commits + self.assertEqual(new_branch.commit, cloned_repo.active_branch.commit) # pointing to the checked-out commit. + # It's easy to let a branch point to the previous commit, without affecting anything else. + # Each reference provides access to the git object it points to, usually commits. assert new_branch.set_commit("HEAD~1").commit == cloned_repo.active_branch.commit.parents[0] # ![9-test_init_repo_object] - # create a new tag reference + # Create a new tag reference. # [10-test_init_repo_object] past = cloned_repo.create_tag( "past", ref=new_branch, message="This is a tag-object pointing to %s" % new_branch.name, ) - self.assertEqual(past.commit, new_branch.commit) # the tag points to the specified commit - assert past.tag.message.startswith("This is") # and its object carries the message provided + self.assertEqual(past.commit, new_branch.commit) # The tag points to the specified commit + assert past.tag.message.startswith("This is") # and its object carries the message provided. - now = cloned_repo.create_tag("now") # This is a tag-reference. It may not carry meta-data + now = cloned_repo.create_tag("now") # This is a tag-reference. It may not carry meta-data. assert now.tag is None # ![10-test_init_repo_object] # Object handling # [11-test_init_repo_object] assert now.commit.message != past.commit.message - # You can read objects directly through binary streams, no working tree required + # You can read objects directly through binary streams, no working tree required. assert (now.commit.tree / "VERSION").data_stream.read().decode("ascii").startswith("3") - # You can traverse trees as well to handle all contained files of a particular commit + # You can traverse trees as well to handle all contained files of a particular commit. file_count = 0 tree_count = 0 tree = past.commit.tree for item in tree.traverse(): file_count += item.type == "blob" tree_count += item.type == "tree" - assert file_count and tree_count # we have accumulated all directories and files - self.assertEqual(len(tree.blobs) + len(tree.trees), len(tree)) # a tree is iterable on its children + assert file_count and tree_count # We have accumulated all directories and files. + self.assertEqual(len(tree.blobs) + len(tree.trees), len(tree)) # A tree is iterable on its children. # ![11-test_init_repo_object] - # remotes allow handling push, pull and fetch operations + # Remotes allow handling push, pull and fetch operations. # [12-test_init_repo_object] from git import RemoteProgress @@ -142,69 +143,67 @@ class Tutorials(TestBase): message or "NO MESSAGE", ) - # end - - self.assertEqual(len(cloned_repo.remotes), 1) # we have been cloned, so should be one remote - self.assertEqual(len(bare_repo.remotes), 0) # this one was just initialized + self.assertEqual(len(cloned_repo.remotes), 1) # We have been cloned, so should be one remote. + self.assertEqual(len(bare_repo.remotes), 0) # This one was just initialized. origin = bare_repo.create_remote("origin", url=cloned_repo.working_tree_dir) assert origin.exists() for fetch_info in origin.fetch(progress=MyProgressPrinter()): print("Updated %s to %s" % (fetch_info.ref, fetch_info.commit)) - # create a local branch at the latest fetched master. We specify the name statically, but you have all - # information to do it programmatically as well. + # Create a local branch at the latest fetched master. We specify the name + # statically, but you have all information to do it programmatically as well. bare_master = bare_repo.create_head("master", origin.refs.master) bare_repo.head.set_reference(bare_master) assert not bare_repo.delete_remote(origin).exists() - # push and pull behave very similarly + # push and pull behave very similarly. # ![12-test_init_repo_object] # index # [13-test_init_repo_object] - self.assertEqual(new_branch.checkout(), cloned_repo.active_branch) # checking out branch adjusts the wtree - self.assertEqual(new_branch.commit, past.commit) # Now the past is checked out + self.assertEqual(new_branch.checkout(), cloned_repo.active_branch) # Checking out branch adjusts the wtree. + self.assertEqual(new_branch.commit, past.commit) # Now the past is checked out. new_file_path = os.path.join(cloned_repo.working_tree_dir, "my-new-file") - open(new_file_path, "wb").close() # create new file in working tree - cloned_repo.index.add([new_file_path]) # add it to the index - # Commit the changes to deviate masters history + open(new_file_path, "wb").close() # Create new file in working tree. + cloned_repo.index.add([new_file_path]) # Add it to the index. + # Commit the changes to deviate masters history. cloned_repo.index.commit("Added a new file in the past - for later merge") - # prepare a merge - master = cloned_repo.heads.master # right-hand side is ahead of us, in the future - merge_base = cloned_repo.merge_base(new_branch, master) # allows for a three-way merge - cloned_repo.index.merge_tree(master, base=merge_base) # write the merge result into index + # Prepare a merge. + master = cloned_repo.heads.master # Right-hand side is ahead of us, in the future. + merge_base = cloned_repo.merge_base(new_branch, master) # Allows for a three-way merge. + cloned_repo.index.merge_tree(master, base=merge_base) # Write the merge result into index. cloned_repo.index.commit( "Merged past and now into future ;)", parent_commits=(new_branch.commit, master.commit), ) - # now new_branch is ahead of master, which probably should be checked out and reset softly. - # note that all these operations didn't touch the working tree, as we managed it ourselves. - # This definitely requires you to know what you are doing :) ! - assert os.path.basename(new_file_path) in new_branch.commit.tree # new file is now in tree - master.commit = new_branch.commit # let master point to most recent commit - cloned_repo.head.reference = master # we adjusted just the reference, not the working tree or index + # Now new_branch is ahead of master, which probably should be checked out and reset softly. + # Note that all these operations didn't touch the working tree, as we managed it ourselves. + # This definitely requires you to know what you are doing! :) + assert os.path.basename(new_file_path) in new_branch.commit.tree # New file is now in tree. + master.commit = new_branch.commit # Let master point to most recent commit. + cloned_repo.head.reference = master # We adjusted just the reference, not the working tree or index. # ![13-test_init_repo_object] # submodules # [14-test_init_repo_object] - # create a new submodule and check it out on the spot, setup to track master branch of `bare_repo` - # As our GitPython repository has submodules already that point to GitHub, make sure we don't - # interact with them + # Create a new submodule and check it out on the spot, setup to track master + # branch of `bare_repo`. As our GitPython repository has submodules already that + # point to GitHub, make sure we don't interact with them. for sm in cloned_repo.submodules: assert not sm.remove().exists() # after removal, the sm doesn't exist anymore sm = cloned_repo.create_submodule("mysubrepo", "path/to/subrepo", url=bare_repo.git_dir, branch="master") - # .gitmodules was written and added to the index, which is now being committed + # .gitmodules was written and added to the index, which is now being committed. cloned_repo.index.commit("Added submodule") - assert sm.exists() and sm.module_exists() # this submodule is definitely available - sm.remove(module=True, configuration=False) # remove the working tree - assert sm.exists() and not sm.module_exists() # the submodule itself is still available + assert sm.exists() and sm.module_exists() # This submodule is definitely available. + sm.remove(module=True, configuration=False) # Remove the working tree. + assert sm.exists() and not sm.module_exists() # The submodule itself is still available. - # update all submodules, non-recursively to save time, this method is very powerful, go have a look + # Update all submodules, non-recursively to save time. This method is very powerful, go have a look. cloned_repo.submodule_update(recursive=False) - assert sm.module_exists() # The submodules working tree was checked out by update + assert sm.module_exists() # The submodule's working tree was checked out by update. # ![14-test_init_repo_object] @with_rw_directory @@ -215,25 +214,25 @@ class Tutorials(TestBase): repo = git.Repo.clone_from(self._small_repo_url(), os.path.join(rw_dir, "repo"), branch="master") heads = repo.heads - master = heads.master # lists can be accessed by name for convenience - master.commit # the commit pointed to by head called master - master.rename("new_name") # rename heads + master = heads.master # Lists can be accessed by name for convenience. + master.commit # the commit pointed to by head called master. + master.rename("new_name") # Rename heads. master.rename("master") # ![1-test_references_and_objects] # [2-test_references_and_objects] tags = repo.tags tagref = tags[0] - tagref.tag # tags may have tag objects carrying additional information - tagref.commit # but they always point to commits - repo.delete_tag(tagref) # delete or - repo.create_tag("my_tag") # create tags using the repo for convenience + tagref.tag # Tags may have tag objects carrying additional information + tagref.commit # but they always point to commits. + repo.delete_tag(tagref) # Delete or + repo.create_tag("my_tag") # create tags using the repo for convenience. # ![2-test_references_and_objects] # [3-test_references_and_objects] - head = repo.head # the head points to the active branch/ref - master = head.reference # retrieve the reference the head points to - master.commit # from here you use it as any other reference + head = repo.head # The head points to the active branch/ref. + master = head.reference # Retrieve the reference the head points to. + master.commit # From here you use it as any other reference. # ![3-test_references_and_objects] # # [4-test_references_and_objects] @@ -243,14 +242,14 @@ class Tutorials(TestBase): # ![4-test_references_and_objects] # [5-test_references_and_objects] - new_branch = repo.create_head("new") # create a new one - new_branch.commit = "HEAD~10" # set branch to another commit without changing index or working trees - repo.delete_head(new_branch) # delete an existing head - only works if it is not checked out + new_branch = repo.create_head("new") # Create a new one. + new_branch.commit = "HEAD~10" # Set branch to another commit without changing index or working trees. + repo.delete_head(new_branch) # Delete an existing head - only works if it is not checked out. # ![5-test_references_and_objects] # [6-test_references_and_objects] new_tag = repo.create_tag("my_new_tag", message="my message") - # You cannot change the commit a tag points to. Tags need to be re-created + # You cannot change the commit a tag points to. Tags need to be re-created. self.assertRaises(AttributeError, setattr, new_tag, "commit", repo.commit("HEAD~1")) repo.delete_tag(new_tag) # ![6-test_references_and_objects] @@ -269,22 +268,22 @@ class Tutorials(TestBase): # ![8-test_references_and_objects] # [9-test_references_and_objects] - self.assertEqual(hct.type, "tree") # preset string type, being a class attribute + self.assertEqual(hct.type, "tree") # Preset string type, being a class attribute. assert hct.size > 0 # size in bytes assert len(hct.hexsha) == 40 assert len(hct.binsha) == 20 # ![9-test_references_and_objects] # [10-test_references_and_objects] - self.assertEqual(hct.path, "") # root tree has no path - assert hct.trees[0].path != "" # the first contained item has one though - self.assertEqual(hct.mode, 0o40000) # trees have the mode of a linux directory - self.assertEqual(hct.blobs[0].mode, 0o100644) # blobs have specific mode, comparable to a standard linux fs + self.assertEqual(hct.path, "") # Root tree has no path. + assert hct.trees[0].path != "" # The first contained item has one though. + self.assertEqual(hct.mode, 0o40000) # Trees have the mode of a Linux directory. + self.assertEqual(hct.blobs[0].mode, 0o100644) # Blobs have specific mode, comparable to a standard Linux fs. # ![10-test_references_and_objects] # [11-test_references_and_objects] - hct.blobs[0].data_stream.read() # stream object to read data from - hct.blobs[0].stream_data(open(os.path.join(rw_dir, "blob_data"), "wb")) # write data to given stream + hct.blobs[0].data_stream.read() # Stream object to read data from. + hct.blobs[0].stream_data(open(os.path.join(rw_dir, "blob_data"), "wb")) # Write data to a given stream. # ![11-test_references_and_objects] # [12-test_references_and_objects] @@ -296,7 +295,7 @@ class Tutorials(TestBase): # [13-test_references_and_objects] fifty_first_commits = list(repo.iter_commits("master", max_count=50)) assert len(fifty_first_commits) == 50 - # this will return commits 21-30 from the commit list as traversed backwards master + # This will return commits 21-30 from the commit list as traversed backwards master. ten_commits_past_twenty = list(repo.iter_commits("master", max_count=10, skip=20)) assert len(ten_commits_past_twenty) == 10 assert fifty_first_commits[20:30] == ten_commits_past_twenty @@ -331,20 +330,20 @@ class Tutorials(TestBase): # ![17-test_references_and_objects] # [18-test_references_and_objects] - assert len(tree.trees) > 0 # trees are subdirectories - assert len(tree.blobs) > 0 # blobs are files + assert len(tree.trees) > 0 # Trees are subdirectories. + assert len(tree.blobs) > 0 # Blobs are files. assert len(tree.blobs) + len(tree.trees) == len(tree) # ![18-test_references_and_objects] # [19-test_references_and_objects] - self.assertEqual(tree["smmap"], tree / "smmap") # access by index and by sub-path - for entry in tree: # intuitive iteration of tree members + self.assertEqual(tree["smmap"], tree / "smmap") # Access by index and by sub-path. + for entry in tree: # Intuitive iteration of tree members. print(entry) - blob = tree.trees[1].blobs[0] # let's get a blob in a sub-tree + blob = tree.trees[1].blobs[0] # Let's get a blob in a sub-tree. assert blob.name assert len(blob.path) < len(blob.abspath) - self.assertEqual(tree.trees[1].name + "/" + blob.name, blob.path) # this is how relative blob path generated - self.assertEqual(tree[blob.path], blob) # you can use paths like 'dir/file' in tree + self.assertEqual(tree.trees[1].name + "/" + blob.name, blob.path) # This is how relative blob path generated. + self.assertEqual(tree[blob.path], blob) # You can use paths like 'dir/file' in tree, # ![19-test_references_and_objects] # [20-test_references_and_objects] @@ -353,11 +352,11 @@ class Tutorials(TestBase): # ![20-test_references_and_objects] # [21-test_references_and_objects] - # This example shows the various types of allowed ref-specs + # This example shows the various types of allowed ref-specs. assert repo.tree() == repo.head.commit.tree past = repo.commit("HEAD~5") assert repo.tree(past) == repo.tree(past.hexsha) - self.assertEqual(repo.tree("v0.8.1").type, "tree") # yes, you can provide any refspec - works everywhere + self.assertEqual(repo.tree("v0.8.1").type, "tree") # Yes, you can provide any refspec - works everywhere. # ![21-test_references_and_objects] # [22-test_references_and_objects] @@ -366,36 +365,36 @@ class Tutorials(TestBase): # [23-test_references_and_objects] index = repo.index - # The index contains all blobs in a flat list + # The index contains all blobs in a flat list. assert len(list(index.iter_blobs())) == len([o for o in repo.head.commit.tree.traverse() if o.type == "blob"]) - # Access blob objects + # Access blob objects. for (_path, _stage), _entry in index.entries.items(): pass new_file_path = os.path.join(repo.working_tree_dir, "new-file-name") open(new_file_path, "w").close() - index.add([new_file_path]) # add a new file to the index - index.remove(["LICENSE"]) # remove an existing one - assert os.path.isfile(os.path.join(repo.working_tree_dir, "LICENSE")) # working tree is untouched + index.add([new_file_path]) # Add a new file to the index. + index.remove(["LICENSE"]) # Remove an existing one. + assert os.path.isfile(os.path.join(repo.working_tree_dir, "LICENSE")) # Working tree is untouched. - self.assertEqual(index.commit("my commit message").type, "commit") # commit changed index - repo.active_branch.commit = repo.commit("HEAD~1") # forget last commit + self.assertEqual(index.commit("my commit message").type, "commit") # Commit changed index. + repo.active_branch.commit = repo.commit("HEAD~1") # Forget last commit. from git import Actor author = Actor("An author", "author@example.com") committer = Actor("A committer", "committer@example.com") - # commit by commit message and author and committer + # Commit with a commit message, author, and committer. index.commit("my commit message", author=author, committer=committer) # ![23-test_references_and_objects] # [24-test_references_and_objects] from git import IndexFile - # loads a tree into a temporary index, which exists just in memory + # Load a tree into a temporary index, which exists just in memory. IndexFile.from_tree(repo, "HEAD~1") - # merge two trees three-way into memory + # Merge two trees three-way into memory... merge_index = IndexFile.from_tree(repo, "HEAD~10", "HEAD", repo.merge_base("HEAD~10", "HEAD")) - # and persist it + # ...and persist it. merge_index.write(os.path.join(rw_dir, "merged_index")) # ![24-test_references_and_objects] @@ -404,20 +403,20 @@ class Tutorials(TestBase): origin = empty_repo.create_remote("origin", repo.remotes.origin.url) assert origin.exists() assert origin == empty_repo.remotes.origin == empty_repo.remotes["origin"] - origin.fetch() # assure we actually have data. fetch() returns useful information - # Setup a local tracking branch of a remote branch - empty_repo.create_head("master", origin.refs.master) # create local branch "master" from remote "master" - empty_repo.heads.master.set_tracking_branch(origin.refs.master) # set local "master" to track remote "master - empty_repo.heads.master.checkout() # checkout local "master" to working tree + origin.fetch() # Ensure we actually have data. fetch() returns useful information. + # Set up a local tracking branch of a remote branch. + empty_repo.create_head("master", origin.refs.master) # Create local branch "master" from remote "master". + empty_repo.heads.master.set_tracking_branch(origin.refs.master) # Set local "master" to track remote "master. + empty_repo.heads.master.checkout() # Check out local "master" to working tree. # Three above commands in one: empty_repo.create_head("master", origin.refs.master).set_tracking_branch(origin.refs.master).checkout() - # rename remotes + # Rename remotes. origin.rename("new_origin") - # push and pull behaves similarly to `git push|pull` + # Push and pull behaves similarly to `git push|pull`. origin.pull() - origin.push() # attempt push, ignore errors - origin.push().raise_if_error() # push and raise error if it fails - # assert not empty_repo.delete_remote(origin).exists() # create and delete remotes + origin.push() # Attempt push, ignore errors. + origin.push().raise_if_error() # Push and raise error if it fails. + # assert not empty_repo.delete_remote(origin).exists() # Create and delete remotes. # ![25-test_references_and_objects] # [26-test_references_and_objects] @@ -425,20 +424,20 @@ class Tutorials(TestBase): with origin.config_writer as cw: cw.set("pushurl", "other_url") - # Please note that in python 2, writing origin.config_writer.set(...) is totally safe. - # In py3 __del__ calls can be delayed, thus not writing changes in time. + # Please note that in Python 2, writing origin.config_writer.set(...) is totally + # safe. In py3 __del__ calls can be delayed, thus not writing changes in time. # ![26-test_references_and_objects] # [27-test_references_and_objects] hcommit = repo.head.commit - hcommit.diff() # diff tree against index - hcommit.diff("HEAD~1") # diff tree against previous tree - hcommit.diff(None) # diff tree against working tree + hcommit.diff() # diff tree against index. + hcommit.diff("HEAD~1") # diff tree against previous tree. + hcommit.diff(None) # diff tree against working tree. index = repo.index - index.diff() # diff index against itself yielding empty diff - index.diff(None) # diff index against working copy - index.diff("HEAD") # diff index against current HEAD tree + index.diff() # diff index against itself yielding empty diff. + index.diff(None) # diff index against working copy. + index.diff("HEAD") # diff index against current HEAD tree. # ![27-test_references_and_objects] # [28-test_references_and_objects] @@ -448,32 +447,33 @@ class Tutorials(TestBase): # ![28-test_references_and_objects] # [29-test_references_and_objects] - # Reset our working tree 10 commits into the past + # Reset our working tree 10 commits into the past. past_branch = repo.create_head("past_branch", "HEAD~10") repo.head.reference = past_branch assert not repo.head.is_detached - # reset the index and working tree to match the pointed-to commit + # Reset the index and working tree to match the pointed-to commit. repo.head.reset(index=True, working_tree=True) - # To detach your head, you have to point to a commit directly + # To detach your head, you have to point to a commit directly. repo.head.reference = repo.commit("HEAD~5") assert repo.head.is_detached - # now our head points 15 commits into the past, whereas the working tree - # and index are 10 commits in the past + # Now our head points 15 commits into the past, whereas the working tree + # and index are 10 commits in the past. # ![29-test_references_and_objects] # [30-test_references_and_objects] - # checkout the branch using git-checkout. It will fail as the working tree appears dirty + # Check out the branch using git-checkout. + # It will fail as the working tree appears dirty. self.assertRaises(git.GitCommandError, repo.heads.master.checkout) repo.heads.past_branch.checkout() # ![30-test_references_and_objects] # [31-test_references_and_objects] - git = repo.git - git.checkout("HEAD", b="my_new_branch") # create a new branch - git.branch("another-new-one") - git.branch("-D", "another-new-one") # pass strings for full control over argument order - git.for_each_ref() # '-' becomes '_' when calling it + git_cmd = repo.git + git_cmd.checkout("HEAD", b="my_new_branch") # Create a new branch. + git_cmd.branch("another-new-one") + git_cmd.branch("-D", "another-new-one") # Pass strings for full control over argument order. + git_cmd.for_each_ref() # '-' becomes '_' when calling it. # ![31-test_references_and_objects] repo.git.clear_cache() @@ -490,19 +490,19 @@ class Tutorials(TestBase): assert len(sms) == 1 sm = sms[0] - self.assertEqual(sm.name, "gitdb") # git-python has gitdb as single submodule ... - self.assertEqual(sm.children()[0].name, "smmap") # ... which has smmap as single submodule + self.assertEqual(sm.name, "gitdb") # GitPython has gitdb as its one and only (direct) submodule... + self.assertEqual(sm.children()[0].name, "smmap") # ...which has smmap as its one and only submodule. - # The module is the repository referenced by the submodule - assert sm.module_exists() # the module is available, which doesn't have to be the case. + # The module is the repository referenced by the submodule. + assert sm.module_exists() # The module is available, which doesn't have to be the case. assert sm.module().working_tree_dir.endswith("gitdb") - # the submodule's absolute path is the module's path + # The submodule's absolute path is the module's path. assert sm.abspath == sm.module().working_tree_dir - self.assertEqual(len(sm.hexsha), 40) # Its sha defines the commit to checkout - assert sm.exists() # yes, this submodule is valid and exists - # read its configuration conveniently + self.assertEqual(len(sm.hexsha), 40) # Its sha defines the commit to check out. + assert sm.exists() # Yes, this submodule is valid and exists. + # Read its configuration conveniently. assert sm.config_reader().get_value("path") == sm.path - self.assertEqual(len(sm.children()), 1) # query the submodule hierarchy + self.assertEqual(len(sm.children()), 1) # Query the submodule hierarchy. # ![1-test_submodules] @with_rw_directory @@ -513,7 +513,7 @@ class Tutorials(TestBase): file_name = os.path.join(repo_dir, "new-file") r = git.Repo.init(repo_dir) - # This function just creates an empty file ... + # This function just creates an empty file. open(file_name, "wb").close() r.index.add([file_name]) r.index.commit("initial commit") diff --git a/test/test_exc.py b/test/test_exc.py index 9e125d2..2e979f5 100644 --- a/test/test_exc.py +++ b/test/test_exc.py @@ -1,14 +1,13 @@ -# -*- coding: utf-8 -*- -# test_exc.py # Copyright (C) 2008, 2009, 2016 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ - +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ +from itertools import product import re import ddt + from git.exc import ( InvalidGitRepositoryError, WorkTreeRepositoryUnsupported, @@ -23,9 +22,8 @@ from git.exc import ( RepositoryDirtyError, ) from git.util import remove_password_if_present -from test.lib import TestBase -import itertools as itt +from test.lib import TestBase _cmd_argvs = ( @@ -43,18 +41,18 @@ _cmd_argvs = ( ), ) _causes_n_substrings = ( - (None, None), # noqa: E241 @IgnorePep8 - (7, "exit code(7)"), # noqa: E241 @IgnorePep8 - ("Some string", "'Some string'"), # noqa: E241 @IgnorePep8 - ("παλιο string", "'παλιο string'"), # noqa: E241 @IgnorePep8 - (Exception("An exc."), "Exception('An exc.')"), # noqa: E241 @IgnorePep8 - (Exception("Κακια exc."), "Exception('Κακια exc.')"), # noqa: E241 @IgnorePep8 - (object(), "<object object at "), # noqa: E241 @IgnorePep8 + (None, None), + (7, "exit code(7)"), + ("Some string", "'Some string'"), + ("παλιο string", "'παλιο string'"), + (Exception("An exc."), "Exception('An exc.')"), + (Exception("Κακια exc."), "Exception('Κακια exc.')"), + (object(), "<object object at "), ) _streams_n_substrings = ( None, - "steram", + "stream", "ομορφο stream", ) @@ -82,7 +80,7 @@ class TExc(TestBase): for ex_class in exception_classes: self.assertTrue(issubclass(ex_class, GitError)) - @ddt.data(*list(itt.product(_cmd_argvs, _causes_n_substrings, _streams_n_substrings))) + @ddt.data(*list(product(_cmd_argvs, _causes_n_substrings, _streams_n_substrings))) def test_CommandError_unicode(self, case): argv, (cause, subs), stream = case cls = CommandError @@ -102,7 +100,7 @@ class TExc(TestBase): if subs is not None: # Substrings (must) already contain opening `'`. - subs = "(?<!')%s(?!')" % re.escape(subs) + subs = r"(?<!')%s(?!')" % re.escape(subs) self.assertRegex(s, subs) if not stream: diff --git a/test/test_fun.py b/test/test_fun.py index d76e189..b8593b4 100644 --- a/test/test_fun.py +++ b/test/test_fun.py @@ -1,26 +1,27 @@ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + from io import BytesIO -from stat import S_IFDIR, S_IFREG, S_IFLNK, S_IXUSR +from stat import S_IFDIR, S_IFLNK, S_IFREG, S_IXUSR from os import stat import os.path as osp -from unittest import SkipTest + +from gitdb.base import IStream +from gitdb.typ import str_tree_type from git import Git from git.index import IndexFile -from git.index.fun import ( - aggressive_tree_merge, - stat_mode_to_index_mode, -) +from git.index.fun import aggressive_tree_merge, stat_mode_to_index_mode from git.objects.fun import ( traverse_tree_recursive, traverse_trees_recursive, - tree_to_stream, tree_entries_from_data, + tree_to_stream, ) from git.repo.fun import find_worktree_git_dir -from test.lib import TestBase, with_rw_repo, with_rw_directory from git.util import bin_to_hex, cygpath, join_path_native -from gitdb.base import IStream -from gitdb.typ import str_tree_type + +from test.lib import TestBase, with_rw_directory, with_rw_repo class TestFun(TestBase): @@ -33,21 +34,22 @@ class TestFun(TestBase): # END assert entry matches fully def test_aggressive_tree_merge(self): - # head tree with additions, removals and modification compared to its predecessor + # Head tree with additions, removals and modification compared to its + # predecessor. odb = self.rorepo.odb HC = self.rorepo.commit("6c1faef799095f3990e9970bc2cb10aa0221cf9c") H = HC.tree B = HC.parents[0].tree - # entries from single tree + # Entries from single tree. trees = [H.binsha] self._assert_index_entries(aggressive_tree_merge(odb, trees), trees) - # from multiple trees + # From multiple trees. trees = [B.binsha, H.binsha] self._assert_index_entries(aggressive_tree_merge(odb, trees), trees) - # three way, no conflict + # Three way, no conflict. tree = self.rorepo.tree B = tree("35a09c0534e89b2d43ec4101a5fb54576b577905") H = tree("4fe5cfa0e063a8d51a1eb6f014e2aaa994e5e7d4") @@ -55,18 +57,18 @@ class TestFun(TestBase): trees = [B.binsha, H.binsha, M.binsha] self._assert_index_entries(aggressive_tree_merge(odb, trees), trees) - # three-way, conflict in at least one file, both modified + # Three-way, conflict in at least one file, both modified. B = tree("a7a4388eeaa4b6b94192dce67257a34c4a6cbd26") H = tree("f9cec00938d9059882bb8eabdaf2f775943e00e5") M = tree("44a601a068f4f543f73fd9c49e264c931b1e1652") trees = [B.binsha, H.binsha, M.binsha] self._assert_index_entries(aggressive_tree_merge(odb, trees), trees) - # too many trees + # Too many trees. self.assertRaises(ValueError, aggressive_tree_merge, odb, trees * 2) def mktree(self, odb, entries): - """create a tree from the given tree entries and safe it to the database""" + """Create a tree from the given tree entries and safe it to the database.""" sio = BytesIO() tree_to_stream(entries, sio.write) sio.seek(0) @@ -93,84 +95,84 @@ class TestFun(TestBase): odb = rwrepo.odb - # base tree + # Base tree. bfn = "basefile" fbase = mkfile(bfn, shaa) tb = mktree(odb, [fbase]) - # non-conflicting new files, same data + # Non-conflicting new files, same data. fa = mkfile("1", shab) th = mktree(odb, [fbase, fa]) fb = mkfile("2", shac) tm = mktree(odb, [fbase, fb]) - # two new files, same base file + # Two new files, same base file. trees = [tb, th, tm] assert_entries(aggressive_tree_merge(odb, trees), 3) - # both delete same file, add own one + # Both delete same file, add own one. fa = mkfile("1", shab) th = mktree(odb, [fa]) fb = mkfile("2", shac) tm = mktree(odb, [fb]) - # two new files + # Two new files. trees = [tb, th, tm] assert_entries(aggressive_tree_merge(odb, trees), 2) - # same file added in both, differently + # Same file added in both, differently. fa = mkfile("1", shab) th = mktree(odb, [fa]) fb = mkfile("1", shac) tm = mktree(odb, [fb]) - # expect conflict + # Expect conflict. trees = [tb, th, tm] assert_entries(aggressive_tree_merge(odb, trees), 2, True) - # same file added, different mode + # Same file added, different mode. fa = mkfile("1", shab) th = mktree(odb, [fa]) fb = mkcommit("1", shab) tm = mktree(odb, [fb]) - # expect conflict + # Expect conflict. trees = [tb, th, tm] assert_entries(aggressive_tree_merge(odb, trees), 2, True) - # same file added in both + # Same file added in both. fa = mkfile("1", shab) th = mktree(odb, [fa]) fb = mkfile("1", shab) tm = mktree(odb, [fb]) - # expect conflict + # Expect conflict. trees = [tb, th, tm] assert_entries(aggressive_tree_merge(odb, trees), 1) - # modify same base file, differently + # Modify same base file, differently. fa = mkfile(bfn, shab) th = mktree(odb, [fa]) fb = mkfile(bfn, shac) tm = mktree(odb, [fb]) - # conflict, 3 versions on 3 stages + # Conflict, 3 versions on 3 stages. trees = [tb, th, tm] assert_entries(aggressive_tree_merge(odb, trees), 3, True) - # change mode on same base file, by making one a commit, the other executable - # no content change ( this is totally unlikely to happen in the real world ) + # Change mode on same base file, by making one a commit, the other executable, + # no content change (this is totally unlikely to happen in the real world). fa = mkcommit(bfn, shaa) th = mktree(odb, [fa]) fb = mkfile(bfn, shaa, executable=1) tm = mktree(odb, [fb]) - # conflict, 3 versions on 3 stages, because of different mode + # Conflict, 3 versions on 3 stages, because of different mode. trees = [tb, th, tm] assert_entries(aggressive_tree_merge(odb, trees), 3, True) for is_them in range(2): - # only we/they change contents + # Only we/they change contents. fa = mkfile(bfn, shab) th = mktree(odb, [fa]) @@ -180,7 +182,7 @@ class TestFun(TestBase): entries = aggressive_tree_merge(odb, trees) assert len(entries) == 1 and entries[0].binsha == shab - # only we/they change the mode + # Only we/they change the mode. fa = mkcommit(bfn, shaa) th = mktree(odb, [fa]) @@ -190,14 +192,14 @@ class TestFun(TestBase): entries = aggressive_tree_merge(odb, trees) assert len(entries) == 1 and entries[0].binsha == shaa and entries[0].mode == fa[1] - # one side deletes, the other changes = conflict + # One side deletes, the other changes = conflict. fa = mkfile(bfn, shab) th = mktree(odb, [fa]) tm = mktree(odb, []) trees = [tb, th, tm] if is_them: trees = [tb, tm, th] - # as one is deleted, there are only 2 entries + # As one is deleted, there are only 2 entries. assert_entries(aggressive_tree_merge(odb, trees), 2, True) # END handle ours, theirs @@ -228,19 +230,19 @@ class TestFun(TestBase): assert len(entry) == num_trees paths = {e[2] for e in entry if e} - # only one path per set of entries + # Only one path per set of entries. assert len(paths) == 1 # END verify entry def test_tree_traversal(self): - # low level tree tarversal + # Low level tree traversal. odb = self.rorepo.odb H = self.rorepo.tree("29eb123beb1c55e5db4aa652d843adccbd09ae18") # head tree M = self.rorepo.tree("e14e3f143e7260de9581aee27e5a9b2645db72de") # merge tree B = self.rorepo.tree("f606937a7a21237c866efafcad33675e6539c103") # base tree B_old = self.rorepo.tree("1f66cfbbce58b4b552b041707a12d437cc5f400a") # old base tree - # two very different trees + # Two very different trees. entries = traverse_trees_recursive(odb, [B_old.binsha, H.binsha], "") self._assert_tree_entries(entries, 2) @@ -248,17 +250,17 @@ class TestFun(TestBase): assert len(oentries) == len(entries) self._assert_tree_entries(oentries, 2) - # single tree + # Single tree. is_no_tree = lambda i, d: i.type != "tree" entries = traverse_trees_recursive(odb, [B.binsha], "") assert len(entries) == len(list(B.traverse(predicate=is_no_tree))) self._assert_tree_entries(entries, 1) - # two trees + # Two trees. entries = traverse_trees_recursive(odb, [B.binsha, H.binsha], "") self._assert_tree_entries(entries, 2) - # tree trees + # Three trees. entries = traverse_trees_recursive(odb, [B.binsha, H.binsha, M.binsha], "") self._assert_tree_entries(entries, 3) @@ -276,10 +278,10 @@ class TestFun(TestBase): @with_rw_directory def test_linked_worktree_traversal(self, rw_dir): - """Check that we can identify a linked worktree based on a .git file""" + """Check that we can identify a linked worktree based on a .git file.""" git = Git(rw_dir) if git.version_info[:3] < (2, 5, 1): - raise SkipTest("worktree feature unsupported") + raise RuntimeError("worktree feature unsupported (test needs git 2.5.1 or later)") rw_master = self.rorepo.clone(join_path_native(rw_dir, "master_repo")) branch = rw_master.create_head("aaaaaaaa") diff --git a/test/test_git.py b/test/test_git.py index 4813095..94e68ec 100644 --- a/test/test_git.py +++ b/test/test_git.py @@ -1,37 +1,117 @@ -# -*- coding: utf-8 -*- -# test_git.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +import contextlib +import gc +import inspect +import logging import os +import os.path as osp +from pathlib import Path +import pickle +import re import shutil import subprocess import sys -from tempfile import TemporaryDirectory, TemporaryFile -from unittest import mock, skipUnless +import tempfile +from unittest import skipUnless -from git import Git, refresh, GitCommandError, GitCommandNotFound, Repo, cmd -from test.lib import TestBase, fixture_path -from test.lib import with_rw_directory -from git.util import cwd, finalize_process - -import os.path as osp +if sys.version_info >= (3, 8): + from unittest import mock +else: + import mock # To be able to examine call_args.kwargs on a mock. -from git.compat import is_win +import ddt +from git import Git, GitCommandError, GitCommandNotFound, Repo, cmd, refresh +from git.util import cwd, finalize_process +from test.lib import TestBase, fixture_path, with_rw_directory + + +@contextlib.contextmanager +def _patch_out_env(name): + try: + old_value = os.environ[name] + except KeyError: + old_value = None + else: + del os.environ[name] + try: + yield + finally: + if old_value is not None: + os.environ[name] = old_value + + +@contextlib.contextmanager +def _rollback_refresh(): + old_git_executable = Git.GIT_PYTHON_GIT_EXECUTABLE + + if old_git_executable is None: + raise RuntimeError("no executable string (need initial refresh before test)") + + try: + yield old_git_executable # Provide the old value for convenience. + finally: + # The cleanup refresh should always raise an exception if it fails, since if it + # fails then previously discovered test results could be misleading and, more + # importantly, subsequent tests may be unable to run or give misleading results. + # So pre-set a non-None value, so that the cleanup will be a "second" refresh. + # This covers cases where a test has set it to None to test a "first" refresh. + Git.GIT_PYTHON_GIT_EXECUTABLE = Git.git_exec_name + + # Do the cleanup refresh. This sets Git.GIT_PYTHON_GIT_EXECUTABLE to old_value + # in most cases. The reason to call it is to achieve other associated state + # changes as well, which include updating attributes of the FetchInfo class. + refresh() + + +@contextlib.contextmanager +def _fake_git(*version_info): + fake_version = ".".join(map(str, version_info)) + fake_output = f"git version {fake_version} (fake)" + + with tempfile.TemporaryDirectory() as tdir: + if sys.platform == "win32": + fake_git = Path(tdir, "fake-git.cmd") + script = f"@echo {fake_output}\n" + fake_git.write_text(script, encoding="utf-8") + else: + fake_git = Path(tdir, "fake-git") + script = f"#!/bin/sh\necho '{fake_output}'\n" + fake_git.write_text(script, encoding="utf-8") + fake_git.chmod(0o755) + + yield str(fake_git.absolute()) + + +def _rename_with_stem(path, new_stem): + if sys.version_info >= (3, 9): + path.rename(path.with_stem(new_stem)) + else: + path.rename(path.with_name(new_stem + path.suffix)) + + +@ddt.ddt class TestGit(TestBase): @classmethod def setUpClass(cls): - super(TestGit, cls).setUpClass() + super().setUpClass() cls.git = Git(cls.rorepo.working_dir) def tearDown(self): - import gc - gc.collect() + def _assert_logged_for_popen(self, log_watcher, name, value): + re_name = re.escape(name) + re_value = re.escape(str(value)) + re_line = re.compile(rf"DEBUG:git.cmd:Popen\(.*\b{re_name}={re_value}[,)]") + match_attempts = [re_line.match(message) for message in log_watcher.output] + self.assertTrue(any(match_attempts), repr(log_watcher.output)) + @mock.patch.object(Git, "execute") def test_call_process_calls_execute(self, git): git.return_value = "" @@ -62,58 +142,139 @@ class TestGit(TestBase): self.assertEqual(["--max-count=0"], self.git.transform_kwargs(**{"max_count": 0})) self.assertEqual([], self.git.transform_kwargs(**{"max_count": None})) - # Multiple args are supported by using lists/tuples + # Multiple args are supported by using lists/tuples. self.assertEqual( ["-L", "1-3", "-L", "12-18"], self.git.transform_kwargs(**{"L": ("1-3", "12-18")}), ) self.assertEqual(["-C", "-C"], self.git.transform_kwargs(**{"C": [True, True, None, False]})) - # order is undefined + # Order is undefined. res = self.git.transform_kwargs(**{"s": True, "t": True}) self.assertEqual({"-s", "-t"}, set(res)) - def test_it_executes_git_to_shell_and_returns_result(self): + _shell_cases = ( + # value_in_call, value_from_class, expected_popen_arg + (None, False, False), + (None, True, True), + (False, True, False), + (False, False, False), + (True, False, True), + (True, True, True), + ) + + def _do_shell_combo(self, value_in_call, value_from_class): + with mock.patch.object(Git, "USE_SHELL", value_from_class): + with mock.patch.object(cmd, "safer_popen", wraps=cmd.safer_popen) as mock_safer_popen: + # Use a command with no arguments (besides the program name), so it runs + # with or without a shell, on all OSes, with the same effect. + self.git.execute(["git"], with_exceptions=False, shell=value_in_call) + + return mock_safer_popen + + @ddt.idata(_shell_cases) + def test_it_uses_shell_or_not_as_specified(self, case): + """A bool passed as ``shell=`` takes precedence over `Git.USE_SHELL`.""" + value_in_call, value_from_class, expected_popen_arg = case + mock_safer_popen = self._do_shell_combo(value_in_call, value_from_class) + mock_safer_popen.assert_called_once() + self.assertIs(mock_safer_popen.call_args.kwargs["shell"], expected_popen_arg) + + @ddt.idata(full_case[:2] for full_case in _shell_cases) + def test_it_logs_if_it_uses_a_shell(self, case): + """``shell=`` in the log message agrees with what is passed to `Popen`.""" + value_in_call, value_from_class = case + with self.assertLogs(cmd.__name__, level=logging.DEBUG) as log_watcher: + mock_safer_popen = self._do_shell_combo(value_in_call, value_from_class) + self._assert_logged_for_popen(log_watcher, "shell", mock_safer_popen.call_args.kwargs["shell"]) + + @ddt.data( + ("None", None), + ("<valid stream>", subprocess.PIPE), + ) + def test_it_logs_istream_summary_for_stdin(self, case): + expected_summary, istream_argument = case + with self.assertLogs(cmd.__name__, level=logging.DEBUG) as log_watcher: + self.git.execute(["git", "version"], istream=istream_argument) + self._assert_logged_for_popen(log_watcher, "stdin", expected_summary) + + def test_it_executes_git_and_returns_result(self): self.assertRegex(self.git.execute(["git", "version"]), r"^git version [\d\.]{2}.*$") - def test_it_executes_git_not_from_cwd(self): - with TemporaryDirectory() as tmpdir: - if is_win: - # Copy an actual binary executable that is not git. - other_exe_path = os.path.join(os.getenv("WINDIR"), "system32", "hostname.exe") - impostor_path = os.path.join(tmpdir, "git.exe") - shutil.copy(other_exe_path, impostor_path) - else: - # Create a shell script that doesn't do anything. - impostor_path = os.path.join(tmpdir, "git") - with open(impostor_path, mode="w", encoding="utf-8") as file: - print("#!/bin/sh", file=file) - os.chmod(impostor_path, 0o755) - - with cwd(tmpdir): - self.assertRegex(self.git.execute(["git", "version"]), r"^git version\b") - - @skipUnless(is_win, "The regression only affected Windows, and this test logic is OS-specific.") + @ddt.data( + # chdir_to_repo, shell, command, use_shell_impostor + (False, False, ["git", "version"], False), + (False, True, "git version", False), + (False, True, "git version", True), + (True, False, ["git", "version"], False), + (True, True, "git version", False), + (True, True, "git version", True), + ) + @with_rw_directory + def test_it_executes_git_not_from_cwd(self, rw_dir, case): + chdir_to_repo, shell, command, use_shell_impostor = case + + repo = Repo.init(rw_dir) + + if sys.platform == "win32": + # Copy an actual binary executable that is not git. (On Windows, running + # "hostname" only displays the hostname, it never tries to change it.) + other_exe_path = Path(os.environ["SystemRoot"], "system32", "hostname.exe") + impostor_path = Path(rw_dir, "git.exe") + shutil.copy(other_exe_path, impostor_path) + else: + # Create a shell script that doesn't do anything. + impostor_path = Path(rw_dir, "git") + impostor_path.write_text("#!/bin/sh\n", encoding="utf-8") + os.chmod(impostor_path, 0o755) + + if use_shell_impostor: + shell_name = "cmd.exe" if sys.platform == "win32" else "sh" + shutil.copy(impostor_path, Path(rw_dir, shell_name)) + + with contextlib.ExitStack() as stack: + if chdir_to_repo: + stack.enter_context(cwd(rw_dir)) + if use_shell_impostor: + stack.enter_context(_patch_out_env("ComSpec")) + + # Run the command without raising an exception on failure, as the exception + # message is currently misleading when the command is a string rather than a + # sequence of strings (it really runs "git", but then wrongly reports "g"). + output = repo.git.execute(command, with_exceptions=False, shell=shell) + + self.assertRegex(output, r"^git version\b") + + @skipUnless( + sys.platform == "win32", + "The regression only affected Windows, and this test logic is OS-specific.", + ) def test_it_avoids_upcasing_unrelated_environment_variable_names(self): old_name = "28f425ca_d5d8_4257_b013_8d63166c8158" if old_name == old_name.upper(): raise RuntimeError("test bug or strange locale: old_name invariant under upcasing") - # Step 1: Set the environment variable in this parent process. Because os.putenv is a thin - # wrapper around a system API, os.environ never sees the variable in this parent - # process, so the name is not upcased even on Windows. + # Step 1 + # + # Set the environment variable in this parent process. Because os.putenv is a + # thin wrapper around a system API, os.environ never sees the variable in this + # parent process, so the name is not upcased even on Windows. os.putenv(old_name, "1") - # Step 2: Create the child process that inherits the environment variable. The child uses - # GitPython, and we are testing that it passes the variable with the exact original - # name to its own child process (the grandchild). + # Step 2 + # + # Create the child process that inherits the environment variable. The child + # uses GitPython, and we are testing that it passes the variable with the exact + # original name to its own child process (the grandchild). cmdline = [ sys.executable, fixture_path("env_case.py"), # Contains steps 3 and 4. self.rorepo.working_dir, old_name, ] - pair_text = subprocess.check_output(cmdline, shell=False, text=True) # Run steps 3 and 4. + + # Run steps 3 and 4. + pair_text = subprocess.check_output(cmdline, shell=False, text=True) new_name = pair_text.split("=")[0] self.assertEqual(new_name, old_name) @@ -128,18 +289,14 @@ class TestGit(TestBase): @mock.patch.object(Git, "execute") def test_it_ignores_false_kwargs(self, git): - # this_should_not_be_ignored=False implies it *should* be ignored + # this_should_not_be_ignored=False implies it *should* be ignored. self.git.version(pass_this_kwarg=False) self.assertTrue("pass_this_kwarg" not in git.call_args[1]) def test_it_raises_proper_exception_with_output_stream(self): - tmp_file = TemporaryFile() - self.assertRaises( - GitCommandError, - self.git.checkout, - "non-existent-branch", - output_stream=tmp_file, - ) + with tempfile.TemporaryFile() as tmp_file: + with self.assertRaises(GitCommandError): + self.git.checkout("non-existent-branch", output_stream=tmp_file) def test_it_accepts_environment_variables(self): filename = fixture_path("ls_tree_empty") @@ -157,77 +314,396 @@ class TestGit(TestBase): self.assertEqual(commit, "4cfd6b0314682d5a58f80be39850bad1640e9241") def test_persistent_cat_file_command(self): - # read header only + # Read header only. hexsha = "b2339455342180c7cc1e9bba3e9f181f7baa5167" g = self.git.cat_file(batch_check=True, istream=subprocess.PIPE, as_process=True) g.stdin.write(b"b2339455342180c7cc1e9bba3e9f181f7baa5167\n") g.stdin.flush() obj_info = g.stdout.readline() - # read header + data + # Read header + data. g = self.git.cat_file(batch=True, istream=subprocess.PIPE, as_process=True) g.stdin.write(b"b2339455342180c7cc1e9bba3e9f181f7baa5167\n") g.stdin.flush() obj_info_two = g.stdout.readline() self.assertEqual(obj_info, obj_info_two) - # read data - have to read it in one large chunk + # Read data - have to read it in one large chunk. size = int(obj_info.split()[2]) g.stdout.read(size) g.stdout.read(1) - # now we should be able to read a new object + # Now we should be able to read a new object. g.stdin.write(b"b2339455342180c7cc1e9bba3e9f181f7baa5167\n") g.stdin.flush() self.assertEqual(g.stdout.readline(), obj_info) - # same can be achieved using the respective command functions + # Same can be achieved using the respective command functions. hexsha, typename, size = self.git.get_object_header(hexsha) hexsha, typename_two, size_two, _ = self.git.get_object_data(hexsha) self.assertEqual(typename, typename_two) self.assertEqual(size, size_two) - def test_version(self): + def test_version_info(self): + """The version_info attribute is a tuple of up to four ints.""" v = self.git.version_info self.assertIsInstance(v, tuple) + self.assertLessEqual(len(v), 4) + for n in v: + self.assertIsInstance(n, int) + + def test_version_info_pickleable(self): + """The version_info attribute is usable on unpickled Git instances.""" + deserialized = pickle.loads(pickle.dumps(self.git)) + v = deserialized.version_info + self.assertIsInstance(v, tuple) + self.assertLessEqual(len(v), 4) for n in v: self.assertIsInstance(n, int) - # END verify number types + + @ddt.data( + (("123", "456", "789"), (123, 456, 789)), + (("12", "34", "56", "78"), (12, 34, 56, 78)), + (("12", "34", "56", "78", "90"), (12, 34, 56, 78)), + (("1", "2", "a", "3"), (1, 2)), + (("1", "-2", "3"), (1,)), + (("1", "2a", "3"), (1,)), # Subject to change. + ) + def test_version_info_is_leading_numbers(self, case): + fake_fields, expected_version_info = case + with _rollback_refresh(): + with _fake_git(*fake_fields) as path: + refresh(path) + new_git = Git() + self.assertEqual(new_git.version_info, expected_version_info) + + def test_git_exc_name_is_git(self): + self.assertEqual(self.git.git_exec_name, "git") def test_cmd_override(self): - with mock.patch.object( - type(self.git), - "GIT_PYTHON_GIT_EXECUTABLE", - osp.join("some", "path", "which", "doesn't", "exist", "gitbinary"), - ): - self.assertRaises(GitCommandNotFound, self.git.version) - - def test_refresh(self): - # test a bad git path refresh - self.assertRaises(GitCommandNotFound, refresh, "yada") - - # test a good path refresh - which_cmd = "where" if is_win else "command -v" - path = os.popen("{0} git".format(which_cmd)).read().strip().split("\n")[0] - refresh(path) + """Directly set bad GIT_PYTHON_GIT_EXECUTABLE causes git operations to raise.""" + bad_path = osp.join("some", "path", "which", "doesn't", "exist", "gitbinary") + with mock.patch.object(Git, "GIT_PYTHON_GIT_EXECUTABLE", bad_path): + with self.assertRaises(GitCommandNotFound) as ctx: + self.git.version() + self.assertEqual(ctx.exception.command, [bad_path, "version"]) + + @ddt.data(("0",), ("q",), ("quiet",), ("s",), ("silence",), ("silent",), ("n",), ("none",)) + def test_initial_refresh_from_bad_git_path_env_quiet(self, case): + """In "q" mode, bad initial path sets "git" and is quiet.""" + (mode,) = case + set_vars = { + "GIT_PYTHON_GIT_EXECUTABLE": str(Path("yada").absolute()), # Any bad path. + "GIT_PYTHON_REFRESH": mode, + } + with _rollback_refresh(): + Git.GIT_PYTHON_GIT_EXECUTABLE = None # Simulate startup. + + with mock.patch.dict(os.environ, set_vars): + refresh() + self.assertEqual(self.git.GIT_PYTHON_GIT_EXECUTABLE, "git") + + @ddt.data(("1",), ("w",), ("warn",), ("warning",), ("l",), ("log",)) + def test_initial_refresh_from_bad_git_path_env_warn(self, case): + """In "w" mode, bad initial path sets "git" and warns, by logging.""" + (mode,) = case + env_vars = { + "GIT_PYTHON_GIT_EXECUTABLE": str(Path("yada").absolute()), # Any bad path. + "GIT_PYTHON_REFRESH": mode, + } + with _rollback_refresh(): + Git.GIT_PYTHON_GIT_EXECUTABLE = None # Simulate startup. + + with mock.patch.dict(os.environ, env_vars): + with self.assertLogs(cmd.__name__, logging.CRITICAL) as ctx: + refresh() + self.assertEqual(len(ctx.records), 1) + message = ctx.records[0].getMessage() + self.assertRegex(message, r"\ABad git executable.\n") + self.assertEqual(self.git.GIT_PYTHON_GIT_EXECUTABLE, "git") + + @ddt.data(("2",), ("r",), ("raise",), ("e",), ("error",)) + def test_initial_refresh_from_bad_git_path_env_error(self, case): + """In "e" mode, bad initial path raises an exception.""" + (mode,) = case + env_vars = { + "GIT_PYTHON_GIT_EXECUTABLE": str(Path("yada").absolute()), # Any bad path. + "GIT_PYTHON_REFRESH": mode, + } + with _rollback_refresh(): + Git.GIT_PYTHON_GIT_EXECUTABLE = None # Simulate startup. + + with mock.patch.dict(os.environ, env_vars): + with self.assertRaisesRegex(ImportError, r"\ABad git executable.\n"): + refresh() + + def test_initial_refresh_from_good_absolute_git_path_env(self): + """Good initial absolute path from environment is set.""" + absolute_path = shutil.which("git") + + with _rollback_refresh(): + Git.GIT_PYTHON_GIT_EXECUTABLE = None # Simulate startup. + + with mock.patch.dict(os.environ, {"GIT_PYTHON_GIT_EXECUTABLE": absolute_path}): + refresh() + self.assertEqual(self.git.GIT_PYTHON_GIT_EXECUTABLE, absolute_path) + + def test_initial_refresh_from_good_relative_git_path_env(self): + """Good initial relative path from environment is kept relative and set.""" + with _rollback_refresh(): + # Set the fallback to a string that wouldn't work and isn't "git", so we are + # more likely to detect if "git" is not set from the environment variable. + with mock.patch.object(Git, "git_exec_name", ""): + Git.GIT_PYTHON_GIT_EXECUTABLE = None # Simulate startup. + + # Now observe if setting the environment variable to "git" takes effect. + with mock.patch.dict(os.environ, {"GIT_PYTHON_GIT_EXECUTABLE": "git"}): + refresh() + self.assertEqual(self.git.GIT_PYTHON_GIT_EXECUTABLE, "git") + + def test_refresh_from_bad_absolute_git_path_env(self): + """Bad absolute path from environment is reported and not set.""" + absolute_path = str(Path("yada").absolute()) + expected_pattern = rf"\n[ \t]*cmdline: {re.escape(absolute_path)}\Z" + + with _rollback_refresh() as old_git_executable: + with mock.patch.dict(os.environ, {"GIT_PYTHON_GIT_EXECUTABLE": absolute_path}): + with self.assertRaisesRegex(GitCommandNotFound, expected_pattern): + refresh() + self.assertEqual(self.git.GIT_PYTHON_GIT_EXECUTABLE, old_git_executable) + + def test_refresh_from_bad_relative_git_path_env(self): + """Bad relative path from environment is kept relative and reported, not set.""" + # Relative paths are not resolved when refresh() is called with no arguments, so + # use a string that's very unlikely to be a command name found in a path lookup. + relative_path = "yada-e47e70c6-acbf-40f8-ad65-13af93c2195b" + expected_pattern = rf"\n[ \t]*cmdline: {re.escape(relative_path)}\Z" + + with _rollback_refresh() as old_git_executable: + with mock.patch.dict(os.environ, {"GIT_PYTHON_GIT_EXECUTABLE": relative_path}): + with self.assertRaisesRegex(GitCommandNotFound, expected_pattern): + refresh() + self.assertEqual(self.git.GIT_PYTHON_GIT_EXECUTABLE, old_git_executable) + + def test_refresh_from_good_absolute_git_path_env(self): + """Good absolute path from environment is set.""" + absolute_path = shutil.which("git") + + with _rollback_refresh(): + with mock.patch.dict(os.environ, {"GIT_PYTHON_GIT_EXECUTABLE": absolute_path}): + refresh() + self.assertEqual(self.git.GIT_PYTHON_GIT_EXECUTABLE, absolute_path) + + def test_refresh_from_good_relative_git_path_env(self): + """Good relative path from environment is kept relative and set.""" + with _rollback_refresh(): + # Set as the executable name a string that wouldn't work and isn't "git". + Git.GIT_PYTHON_GIT_EXECUTABLE = "" + + # Now observe if setting the environment variable to "git" takes effect. + with mock.patch.dict(os.environ, {"GIT_PYTHON_GIT_EXECUTABLE": "git"}): + refresh() + self.assertEqual(self.git.GIT_PYTHON_GIT_EXECUTABLE, "git") + + def test_refresh_with_bad_absolute_git_path_arg(self): + """Bad absolute path arg is reported and not set.""" + absolute_path = str(Path("yada").absolute()) + expected_pattern = rf"\n[ \t]*cmdline: {re.escape(absolute_path)}\Z" + + with _rollback_refresh() as old_git_executable: + with self.assertRaisesRegex(GitCommandNotFound, expected_pattern): + refresh(absolute_path) + self.assertEqual(self.git.GIT_PYTHON_GIT_EXECUTABLE, old_git_executable) + + def test_refresh_with_bad_relative_git_path_arg(self): + """Bad relative path arg is resolved to absolute path and reported, not set.""" + absolute_path = str(Path("yada").absolute()) + expected_pattern = rf"\n[ \t]*cmdline: {re.escape(absolute_path)}\Z" + + with _rollback_refresh() as old_git_executable: + with self.assertRaisesRegex(GitCommandNotFound, expected_pattern): + refresh("yada") + self.assertEqual(self.git.GIT_PYTHON_GIT_EXECUTABLE, old_git_executable) + + def test_refresh_with_good_absolute_git_path_arg(self): + """Good absolute path arg is set.""" + absolute_path = shutil.which("git") + + with _rollback_refresh(): + refresh(absolute_path) + self.assertEqual(self.git.GIT_PYTHON_GIT_EXECUTABLE, absolute_path) + + def test_refresh_with_good_relative_git_path_arg(self): + """Good relative path arg is resolved to absolute path and set.""" + absolute_path = shutil.which("git") + dirname, basename = osp.split(absolute_path) + + with cwd(dirname): + with _rollback_refresh(): + refresh(basename) + self.assertEqual(self.git.GIT_PYTHON_GIT_EXECUTABLE, absolute_path) + + def test_version_info_is_cached(self): + fake_version_info = (123, 456, 789) + with _rollback_refresh(): + with _fake_git(*fake_version_info) as path: + new_git = Git() # Not cached yet. + refresh(path) + self.assertEqual(new_git.version_info, fake_version_info) + os.remove(path) # Arrange that a second subprocess call would fail. + self.assertEqual(new_git.version_info, fake_version_info) + + def test_version_info_cache_is_per_instance(self): + with _rollback_refresh(): + with _fake_git(123, 456, 789) as path: + git1 = Git() + git2 = Git() + refresh(path) + git1.version_info + os.remove(path) # Arrange that the second subprocess call will fail. + with self.assertRaises(GitCommandNotFound): + git2.version_info + git1.version_info + + def test_version_info_cache_is_not_pickled(self): + with _rollback_refresh(): + with _fake_git(123, 456, 789) as path: + git1 = Git() + refresh(path) + git1.version_info + git2 = pickle.loads(pickle.dumps(git1)) + os.remove(path) # Arrange that the second subprocess call will fail. + with self.assertRaises(GitCommandNotFound): + git2.version_info + git1.version_info + + def test_successful_refresh_with_arg_invalidates_cached_version_info(self): + with _rollback_refresh(): + with _fake_git(11, 111, 1) as path1: + with _fake_git(22, 222, 2) as path2: + new_git = Git() + refresh(path1) + new_git.version_info + refresh(path2) + self.assertEqual(new_git.version_info, (22, 222, 2)) + + def test_failed_refresh_with_arg_does_not_invalidate_cached_version_info(self): + with _rollback_refresh(): + with _fake_git(11, 111, 1) as path1: + with _fake_git(22, 222, 2) as path2: + new_git = Git() + refresh(path1) + new_git.version_info + os.remove(path1) # Arrange that a repeat call for path1 would fail. + os.remove(path2) # Arrange that the new call for path2 will fail. + with self.assertRaises(GitCommandNotFound): + refresh(path2) + self.assertEqual(new_git.version_info, (11, 111, 1)) + + def test_successful_refresh_with_same_arg_invalidates_cached_version_info(self): + """Changing git at the same path and refreshing affects version_info.""" + with _rollback_refresh(): + with _fake_git(11, 111, 1) as path1: + with _fake_git(22, 222, 2) as path2: + new_git = Git() + refresh(path1) + new_git.version_info + shutil.copy(path2, path1) + refresh(path1) # The fake git at path1 has a different version now. + self.assertEqual(new_git.version_info, (22, 222, 2)) + + def test_successful_refresh_with_env_invalidates_cached_version_info(self): + with contextlib.ExitStack() as stack: + stack.enter_context(_rollback_refresh()) + path1 = stack.enter_context(_fake_git(11, 111, 1)) + path2 = stack.enter_context(_fake_git(22, 222, 2)) + with mock.patch.dict(os.environ, {"GIT_PYTHON_GIT_EXECUTABLE": path1}): + new_git = Git() + refresh() + new_git.version_info + with mock.patch.dict(os.environ, {"GIT_PYTHON_GIT_EXECUTABLE": path2}): + refresh() + self.assertEqual(new_git.version_info, (22, 222, 2)) + + def test_failed_refresh_with_env_does_not_invalidate_cached_version_info(self): + with contextlib.ExitStack() as stack: + stack.enter_context(_rollback_refresh()) + path1 = stack.enter_context(_fake_git(11, 111, 1)) + path2 = stack.enter_context(_fake_git(22, 222, 2)) + with mock.patch.dict(os.environ, {"GIT_PYTHON_GIT_EXECUTABLE": path1}): + new_git = Git() + refresh() + new_git.version_info + os.remove(path1) # Arrange that a repeat call for path1 would fail. + os.remove(path2) # Arrange that the new call for path2 will fail. + with mock.patch.dict(os.environ, {"GIT_PYTHON_GIT_EXECUTABLE": path2}): + with self.assertRaises(GitCommandNotFound): + refresh(path2) + self.assertEqual(new_git.version_info, (11, 111, 1)) + + def test_successful_refresh_with_same_env_invalidates_cached_version_info(self): + """Changing git at the same path/command and refreshing affects version_info.""" + with contextlib.ExitStack() as stack: + stack.enter_context(_rollback_refresh()) + path1 = stack.enter_context(_fake_git(11, 111, 1)) + path2 = stack.enter_context(_fake_git(22, 222, 2)) + with mock.patch.dict(os.environ, {"GIT_PYTHON_GIT_EXECUTABLE": path1}): + new_git = Git() + refresh() + new_git.version_info + shutil.copy(path2, path1) + refresh() # The fake git at path1 has a different version now. + self.assertEqual(new_git.version_info, (22, 222, 2)) + + def test_successful_default_refresh_invalidates_cached_version_info(self): + """Refreshing updates version after a filesystem change adds a git command.""" + # The key assertion here is the last. The others mainly verify the test itself. + with contextlib.ExitStack() as stack: + stack.enter_context(_rollback_refresh()) + + path1 = Path(stack.enter_context(_fake_git(11, 111, 1))) + path2 = Path(stack.enter_context(_fake_git(22, 222, 2))) + + new_path_var = f"{path1.parent}{os.pathsep}{path2.parent}" + stack.enter_context(mock.patch.dict(os.environ, {"PATH": new_path_var})) + stack.enter_context(_patch_out_env("GIT_PYTHON_GIT_EXECUTABLE")) + + if sys.platform == "win32": + # On Windows, use a shell so "git" finds "git.cmd". The correct and safe + # ways to do this straightforwardly are to set GIT_PYTHON_GIT_EXECUTABLE + # to git.cmd in the environment, or call git.refresh with the command's + # full path. See the Git.USE_SHELL docstring for deprecation details. + # But this tests a "default" scenario where neither is done. The + # approach used here, setting USE_SHELL to True so PATHEXT is honored, + # should not be used in production code (nor even in most test cases). + stack.enter_context(mock.patch.object(Git, "USE_SHELL", True)) + + new_git = Git() + _rename_with_stem(path2, "git") # "Install" git, "late" in the PATH. + refresh() + self.assertEqual(new_git.version_info, (22, 222, 2), 'before "downgrade"') + _rename_with_stem(path1, "git") # "Install" another, higher priority. + self.assertEqual(new_git.version_info, (22, 222, 2), "stale version") + refresh() + self.assertEqual(new_git.version_info, (11, 111, 1), "fresh version") def test_options_are_passed_to_git(self): - # This work because any command after git --version is ignored + # This works because any command after git --version is ignored. git_version = self.git(version=True).NoOp() git_command_version = self.git.version() self.assertEqual(git_version, git_command_version) def test_persistent_options(self): git_command_version = self.git.version() - # analog to test_options_are_passed_to_git + # Analog to test_options_are_passed_to_git. self.git.set_persistent_git_options(version=True) git_version = self.git.NoOp() self.assertEqual(git_version, git_command_version) - # subsequent calls keep this option: + # Subsequent calls keep this option: git_version_2 = self.git.NoOp() self.assertEqual(git_version_2, git_command_version) - # reset to empty: + # Reset to empty: self.git.set_persistent_git_options() self.assertRaises(GitCommandError, self.git.NoOp) @@ -240,7 +716,7 @@ class TestGit(TestBase): self.git.log(n=1) def test_insert_after_kwarg_raises(self): - # This isn't a complete add command, which doesn't matter here + # This isn't a complete add command, which doesn't matter here. self.assertRaises(ValueError, self.git.remote, "add", insert_kwargs_after="foo") def test_env_vars_passed_to_git(self): @@ -250,10 +726,10 @@ class TestGit(TestBase): @with_rw_directory def test_environment(self, rw_dir): - # sanity check + # Sanity check. self.assertEqual(self.git.environment(), {}) - # make sure the context manager works and cleans up after itself + # Make sure the context manager works and cleans up after itself. with self.git.custom_environment(PWD="/tmp"): self.assertEqual(self.git.environment(), {"PWD": "/tmp"}) @@ -284,7 +760,7 @@ class TestGit(TestBase): self.assertIn("FOO", str(err)) def test_handle_process_output(self): - from git.cmd import handle_process_output + from git.cmd import handle_process_output, safer_popen line_count = 5002 count = [None, 0, 0] @@ -300,16 +776,23 @@ class TestGit(TestBase): fixture_path("cat_file.py"), str(fixture_path("issue-301_stderr")), ] - proc = subprocess.Popen( + proc = safer_popen( cmdline, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=False, - creationflags=cmd.PROC_CREATIONFLAGS, ) handle_process_output(proc, counter_stdout, counter_stderr, finalize_process) self.assertEqual(count[1], line_count) self.assertEqual(count[2], line_count) + + def test_execute_kwargs_set_agrees_with_method(self): + parameter_names = inspect.signature(cmd.Git.execute).parameters.keys() + self_param, command_param, *most_params, extra_kwargs_param = parameter_names + self.assertEqual(self_param, "self") + self.assertEqual(command_param, "command") + self.assertEqual(set(most_params), cmd.execute_kwargs) # Most important. + self.assertEqual(extra_kwargs_param, "subprocess_kwargs") diff --git a/test/test_imports.py b/test/test_imports.py new file mode 100644 index 0000000..8e70c66 --- /dev/null +++ b/test/test_imports.py @@ -0,0 +1,32 @@ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +import sys + +import git + + +def test_git_util_attribute_is_git_index_util(): + """The top-level module's ``util`` attribute is really :mod:`git.index.util`. + + Although this situation is unintuitive and not a design goal, this has historically + been the case, and it should not be changed without considering the effect on + backward compatibility. In practice, it cannot be changed at least until the next + major version of GitPython. This test checks that it is not accidentally changed, + which could happen when refactoring imports. + """ + assert git.util is git.index.util + + +def test_git_index_util_attribute_is_git_index_util(): + """Nothing unusual is happening with git.index.util itself.""" + assert git.index.util is sys.modules["git.index.util"] + + +def test_separate_git_util_module_exists(): + """The real git.util and git.index.util modules really are separate. + + The real git.util module can be accessed to import a name ``...` by writing + ``from git.util import ...``, and the module object can be accessed in sys.modules. + """ + assert sys.modules["git.util"] is not sys.modules["git.index.util"] diff --git a/test/test_index.py b/test/test_index.py index fba9c78..c586a0b 100644 --- a/test/test_index.py +++ b/test/test_index.py @@ -1,47 +1,181 @@ -# -*- coding: utf-8 -*- -# test_index.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ +import contextlib +from dataclasses import dataclass from io import BytesIO +import logging import os +import os.path as osp +from pathlib import Path +import re +import shutil from stat import S_ISLNK, ST_MODE +import subprocess +import sys import tempfile -from unittest import skipIf -import shutil -from git import ( - IndexFile, - Repo, - BlobFilter, - UnmergedEntriesError, - Tree, - Object, - Diff, - GitCommandError, +from gitdb.base import IStream + +import ddt +import pytest + +from git import BlobFilter, Diff, Git, IndexFile, Object, Repo, Tree +from git.exc import ( CheckoutError, + GitCommandError, + HookExecutionError, + InvalidGitRepositoryError, + UnmergedEntriesError, ) -from git.compat import is_win -from git.exc import HookExecutionError, InvalidGitRepositoryError -from git.index.fun import hook_path +from git.index.fun import hook_path, run_commit_hook from git.index.typ import BaseIndexEntry, IndexEntry +from git.index.util import TemporaryFileSwap from git.objects import Blob -from test.lib import TestBase, fixture_path, fixture, with_rw_repo -from test.lib import with_rw_directory -from git.util import Actor, rmtree -from git.util import HIDE_WINDOWS_KNOWN_ERRORS, hex_to_bin -from gitdb.base import IStream +from git.util import Actor, cwd, hex_to_bin, rmtree + +from test.lib import ( + TestBase, + VirtualEnvironment, + fixture, + fixture_path, + with_rw_directory, + with_rw_repo, +) -import os.path as osp -from git.cmd import Git +HOOKS_SHEBANG = "#!/usr/bin/env sh\n" -from pathlib import Path +_logger = logging.getLogger(__name__) -HOOKS_SHEBANG = "#!/usr/bin/env sh\n" -is_win_without_bash = is_win and not shutil.which("bash.exe") +def _get_windows_ansi_encoding(): + """Get the encoding specified by the Windows system-wide ANSI active code page.""" + # locale.getencoding may work but is only in Python 3.11+. Use the registry instead. + import winreg + + hklm_path = R"SYSTEM\CurrentControlSet\Control\Nls\CodePage" + with winreg.OpenKey(winreg.HKEY_LOCAL_MACHINE, hklm_path) as key: + value, _ = winreg.QueryValueEx(key, "ACP") + return f"cp{value}" + + +class WinBashStatus: + """Namespace of native-Windows bash.exe statuses. Affects what hook tests can pass. + + Call check() to check the status. (CheckError and WinError should not typically be + used to trigger skip or xfail, because they represent unexpected situations.) + """ + + @dataclass + class Inapplicable: + """This system is not native Windows: either not Windows at all, or Cygwin.""" + + @dataclass + class Absent: + """No command for bash.exe is found on the system.""" + + @dataclass + class Native: + """Running bash.exe operates outside any WSL distribution (as with Git Bash).""" + + @dataclass + class Wsl: + """Running bash.exe calls bash in a WSL distribution.""" + + @dataclass + class WslNoDistro: + """Running bash.exe tries to run bash on a WSL distribution, but none exists.""" + + process: "subprocess.CompletedProcess[bytes]" + message: str + + @dataclass + class CheckError: + """Running bash.exe fails in an unexpected error or gives unexpected output.""" + + process: "subprocess.CompletedProcess[bytes]" + message: str + + @dataclass + class WinError: + """bash.exe may exist but can't run. CreateProcessW fails unexpectedly.""" + + exception: OSError + + @classmethod + def check(cls): + """Check the status of the bash.exe that run_commit_hook will try to use. + + This runs a command with bash.exe and checks the result. On Windows, shell and + non-shell executable search differ; shutil.which often finds the wrong bash.exe. + + run_commit_hook uses Popen, including to run bash.exe on Windows. It doesn't + pass shell=True (and shouldn't). On Windows, Popen calls CreateProcessW, which + checks some locations before using the PATH environment variable. It is expected + to try System32, even if another directory with the executable precedes it in + PATH. When WSL is present, even with no distributions, bash.exe usually exists + in System32; Popen finds it even if a shell would run another one, as on CI. + (Without WSL, System32 may still have bash.exe; users sometimes put it there.) + """ + if sys.platform != "win32": + return cls.Inapplicable() + + try: + # Output rather than forwarding the test command's exit status so that if a + # failure occurs before we even get to this point, we will detect it. For + # information on ways to check for WSL, see https://superuser.com/a/1749811. + script = 'test -e /proc/sys/fs/binfmt_misc/WSLInterop; echo "$?"' + command = ["bash.exe", "-c", script] + process = subprocess.run(command, capture_output=True) + except FileNotFoundError: + return cls.Absent() + except OSError as error: + return cls.WinError(error) + + text = cls._decode(process.stdout).rstrip() # stdout includes WSL's own errors. + + if process.returncode == 1 and re.search(r"\bhttps://aka.ms/wslstore\b", text): + return cls.WslNoDistro(process, text) + if process.returncode != 0: + _logger.error("Error running bash.exe to check WSL status: %s", text) + return cls.CheckError(process, text) + if text == "0": + return cls.Wsl() + if text == "1": + return cls.Native() + _logger.error("Strange output checking WSL status: %s", text) + return cls.CheckError(process, text) + + @staticmethod + def _decode(stdout): + """Decode bash.exe output as best we can.""" + # When bash.exe is the WSL wrapper but the output is from WSL itself rather than + # code running in a distribution, the output is often in UTF-16LE, which Windows + # uses internally. The UTF-16LE representation of a Windows-style line ending is + # rarely seen otherwise, so use it to detect this situation. + if b"\r\0\n\0" in stdout: + return stdout.decode("utf-16le") + + # At this point, the output is either blank or probably not UTF-16LE. It's often + # UTF-8 from inside a WSL distro or non-WSL bash shell. Our test command only + # uses the ASCII subset, so we can safely guess a wrong code page for it. Errors + # from such an environment can contain any text, but unlike WSL's own messages, + # they go to stderr, not stdout. So we can try the system ANSI code page first. + acp = _get_windows_ansi_encoding() + try: + return stdout.decode(acp) + except UnicodeDecodeError: + pass + except LookupError as error: + _logger.warning(str(error)) # Message already says "Unknown encoding:". + + # Assume UTF-8. If invalid, substitute Unicode replacement characters. + return stdout.decode("utf-8", errors="replace") + + +_win_bash_status = WinBashStatus.check() def _make_hook(git_dir, name, content, make_exec=True): @@ -57,9 +191,10 @@ def _make_hook(git_dir, name, content, make_exec=True): return hp +@ddt.ddt class TestIndex(TestBase): def __init__(self, *args): - super(TestIndex, self).__init__(*args) + super().__init__(*args) self._reset_progress() def _assert_fprogress(self, entries): @@ -79,13 +214,12 @@ class TestIndex(TestBase): self._fprogress_map[path] = curval + 1 def _fprogress_add(self, path, done, item): - """Called as progress func - we keep track of the proper - call order""" + """Called as progress func - we keep track of the proper call order.""" assert item is not None self._fprogress(path, done, item) def _reset_progress(self): - # maps paths to the count of calls + # Maps paths to the count of calls. self._fprogress_map = {} def _assert_entries(self, entries): @@ -96,12 +230,12 @@ class TestIndex(TestBase): # END for each entry def test_index_file_base(self): - # read from file + # Read from file. index = IndexFile(self.rorepo, fixture_path("index")) assert index.entries assert index.version > 0 - # test entry + # Test entry. entry = next(iter(index.entries.values())) for attr in ( "path", @@ -120,17 +254,17 @@ class TestIndex(TestBase): getattr(entry, attr) # END for each method - # test update + # Test update. entries = index.entries assert isinstance(index.update(), IndexFile) assert entries is not index.entries - # test stage + # Test stage. index_merge = IndexFile(self.rorepo, fixture_path("index_merge")) self.assertEqual(len(index_merge.entries), 106) assert len([e for e in index_merge.entries.values() if e.stage != 0]) - # write the data - it must match the original + # Write the data - it must match the original. tmpfile = tempfile.mktemp() index_merge.write(tmpfile) with open(tmpfile, "rb") as fp: @@ -138,7 +272,7 @@ class TestIndex(TestBase): os.remove(tmpfile) def _cmp_tree_index(self, tree, index): - # fail unless both objects contain the same paths and blobs + # Fail unless both objects contain the same paths and blobs. if isinstance(tree, str): tree = self.rorepo.commit(tree).tree @@ -161,14 +295,14 @@ class TestIndex(TestBase): rw_repo.index.add([Blob(rw_repo, b"f" * 20, "bad-permissions", "foo")]) try: - ## 1st fail on purpose adding into index. + ## First, fail on purpose adding into index. add_bad_blob() except Exception as ex: msg_py3 = "required argument is not an integer" msg_py2 = "cannot convert argument to integer" assert msg_py2 in str(ex) or msg_py3 in str(ex) - ## 2nd time should not fail due to stray lock file + ## The second time should not fail due to stray lock file. try: add_bad_blob() except Exception as ex: @@ -180,17 +314,17 @@ class TestIndex(TestBase): cur_sha = "4b43ca7ff72d5f535134241e7c797ddc9c7a3573" other_sha = "39f85c4358b7346fee22169da9cad93901ea9eb9" - # simple index from tree + # Simple index from tree. base_index = IndexFile.from_tree(rw_repo, common_ancestor_sha) assert base_index.entries self._cmp_tree_index(common_ancestor_sha, base_index) - # merge two trees - its like a fast-forward + # Merge two trees - it's like a fast-forward. two_way_index = IndexFile.from_tree(rw_repo, common_ancestor_sha, cur_sha) assert two_way_index.entries self._cmp_tree_index(cur_sha, two_way_index) - # merge three trees - here we have a merge conflict + # Merge three trees - here we have a merge conflict. three_way_index = IndexFile.from_tree(rw_repo, common_ancestor_sha, cur_sha, other_sha) assert len([e for e in three_way_index.entries.values() if e.stage != 0]) @@ -201,19 +335,19 @@ class TestIndex(TestBase): assert merge_blobs[0][0] in (1, 2, 3) assert isinstance(merge_blobs[0][1], Blob) - # test BlobFilter + # Test BlobFilter. prefix = "lib/git" for _stage, blob in base_index.iter_blobs(BlobFilter([prefix])): assert blob.path.startswith(prefix) - # writing a tree should fail with an unmerged index + # Writing a tree should fail with an unmerged index. self.assertRaises(UnmergedEntriesError, three_way_index.write_tree) - # removed unmerged entries + # Removed unmerged entries. unmerged_blob_map = three_way_index.unmerged_blobs() assert unmerged_blob_map - # pick the first blob at the first stage we find and use it as resolved version + # Pick the first blob at the first stage we find and use it as resolved version. three_way_index.resolve_blobs(line[0][1] for line in unmerged_blob_map.values()) tree = three_way_index.write_tree() assert isinstance(tree, Tree) @@ -231,13 +365,13 @@ class TestIndex(TestBase): self.assertEqual(len({self.rorepo, self.rorepo, rw_repo, rw_repo}), 2) # SINGLE TREE MERGE - # current index is at the (virtual) cur_commit + # Current index is at the (virtual) cur_commit. next_commit = "4c39f9da792792d4e73fc3a5effde66576ae128c" parent_commit = rw_repo.head.commit.parents[0] manifest_key = IndexFile.entry_key("MANIFEST.in", 0) manifest_entry = rw_repo.index.entries[manifest_key] rw_repo.index.merge_tree(next_commit) - # only one change should be recorded + # Only one change should be recorded. assert manifest_entry.binsha != rw_repo.index.entries[manifest_key].binsha rw_repo.index.reset(rw_repo.head) @@ -245,42 +379,43 @@ class TestIndex(TestBase): # FAKE MERGE ############# - # Add a change with a NULL sha that should conflict with next_commit. We - # pretend there was a change, but we do not even bother adding a proper - # sha for it ( which makes things faster of course ) + # Add a change with a NULL sha that should conflict with next_commit. We pretend + # there was a change, but we do not even bother adding a proper sha for it + # (which makes things faster of course). manifest_fake_entry = BaseIndexEntry((manifest_entry[0], b"\0" * 20, 0, manifest_entry[3])) - # try write flag + # Try write flag. self._assert_entries(rw_repo.index.add([manifest_fake_entry], write=False)) - # add actually resolves the null-hex-sha for us as a feature, but we can - # edit the index manually + # Add actually resolves the null-hex-sha for us as a feature, but we can edit + # the index manually. assert rw_repo.index.entries[manifest_key].binsha != Object.NULL_BIN_SHA - # must operate on the same index for this ! Its a bit problematic as - # it might confuse people + # We must operate on the same index for this! It's a bit problematic as it might + # confuse people. index = rw_repo.index index.entries[manifest_key] = IndexEntry.from_base(manifest_fake_entry) index.write() self.assertEqual(rw_repo.index.entries[manifest_key].hexsha, Diff.NULL_HEX_SHA) - # write an unchanged index ( just for the fun of it ) + # Write an unchanged index (just for the fun of it). rw_repo.index.write() - # a three way merge would result in a conflict and fails as the command will - # not overwrite any entries in our index and hence leave them unmerged. This is - # mainly a protection feature as the current index is not yet in a tree + # A three way merge would result in a conflict and fails as the command will not + # overwrite any entries in our index and hence leave them unmerged. This is + # mainly a protection feature as the current index is not yet in a tree. self.assertRaises(GitCommandError, index.merge_tree, next_commit, base=parent_commit) - # the only way to get the merged entries is to safe the current index away into a tree, - # which is like a temporary commit for us. This fails as well as the NULL sha deos not - # have a corresponding object - # NOTE: missing_ok is not a kwarg anymore, missing_ok is always true + # The only way to get the merged entries is to safe the current index away into + # a tree, which is like a temporary commit for us. This fails as well as the + # NULL sha does not have a corresponding object. + # NOTE: missing_ok is not a kwarg anymore, missing_ok is always true. # self.assertRaises(GitCommandError, index.write_tree) - # if missing objects are okay, this would work though ( they are always okay now ) - # As we can't read back the tree with NULL_SHA, we rather set it to something else + # If missing objects are okay, this would work though (they are always okay + # now). As we can't read back the tree with NULL_SHA, we rather set it to + # something else. index.entries[manifest_key] = IndexEntry(manifest_entry[:1] + (hex_to_bin("f" * 40),) + manifest_entry[2:]) tree = index.write_tree() - # now make a proper three way merge with unmerged entries + # Now make a proper three way merge with unmerged entries. unmerged_tree = IndexFile.from_tree(rw_repo, parent_commit, tree, next_commit) unmerged_blobs = unmerged_tree.unmerged_blobs() self.assertEqual(len(unmerged_blobs), 1) @@ -288,49 +423,49 @@ class TestIndex(TestBase): @with_rw_repo("0.1.6") def test_index_file_diffing(self, rw_repo): - # default Index instance points to our index + # Default IndexFile instance points to our index. index = IndexFile(rw_repo) assert index.path is not None assert len(index.entries) - # write the file back + # Write the file back. index.write() - # could sha it, or check stats + # Could sha it, or check stats. - # test diff - # resetting the head will leave the index in a different state, and the - # diff will yield a few changes + # Test diff. + # Resetting the head will leave the index in a different state, and the diff + # will yield a few changes. cur_head_commit = rw_repo.head.reference.commit rw_repo.head.reset("HEAD~6", index=True, working_tree=False) - # diff against same index is 0 + # Diff against same index is 0. diff = index.diff() self.assertEqual(len(diff), 0) - # against HEAD as string, must be the same as it matches index + # Against HEAD as string, must be the same as it matches index. diff = index.diff("HEAD") self.assertEqual(len(diff), 0) - # against previous head, there must be a difference + # Against previous head, there must be a difference. diff = index.diff(cur_head_commit) assert len(diff) - # we reverse the result + # We reverse the result. adiff = index.diff(str(cur_head_commit), R=True) - odiff = index.diff(cur_head_commit, R=False) # now its not reversed anymore + odiff = index.diff(cur_head_commit, R=False) # Now its not reversed anymore. assert adiff != odiff - self.assertEqual(odiff, diff) # both unreversed diffs against HEAD + self.assertEqual(odiff, diff) # Both unreversed diffs against HEAD. - # against working copy - its still at cur_commit + # Against working copy - it's still at cur_commit. wdiff = index.diff(None) assert wdiff != adiff assert wdiff != odiff - # against something unusual + # Against something unusual. self.assertRaises(ValueError, index.diff, int) - # adjust the index to match an old revision + # Adjust the index to match an old revision. cur_branch = rw_repo.active_branch cur_commit = cur_branch.commit rev_head_parent = "HEAD~1" @@ -339,10 +474,10 @@ class TestIndex(TestBase): self.assertEqual(cur_branch, rw_repo.active_branch) self.assertEqual(cur_commit, rw_repo.head.commit) - # there must be differences towards the working tree which is in the 'future' + # There must be differences towards the working tree which is in the 'future'. assert index.diff(None) - # reset the working copy as well to current head,to pull 'back' as well + # Reset the working copy as well to current head, to pull 'back' as well. new_data = b"will be reverted" file_path = osp.join(rw_repo.working_tree_dir, "CHANGES") with open(file_path, "wb") as fp: @@ -354,7 +489,7 @@ class TestIndex(TestBase): with open(file_path, "rb") as fp: assert fp.read() != new_data - # test full checkout + # Test full checkout. test_file = osp.join(rw_repo.working_tree_dir, "CHANGES") with open(test_file, "ab") as fd: fd.write(b"some data") @@ -369,26 +504,26 @@ class TestIndex(TestBase): self._assert_fprogress([None]) assert osp.isfile(test_file) - # individual file + # Individual file. os.remove(test_file) rval = index.checkout(test_file, fprogress=self._fprogress) self.assertEqual(list(rval)[0], "CHANGES") self._assert_fprogress([test_file]) assert osp.exists(test_file) - # checking out non-existing file throws + # Checking out non-existing file throws. self.assertRaises(CheckoutError, index.checkout, "doesnt_exist_ever.txt.that") self.assertRaises(CheckoutError, index.checkout, paths=["doesnt/exist"]) - # checkout file with modifications + # Check out file with modifications. append_data = b"hello" with open(test_file, "ab") as fp: fp.write(append_data) try: index.checkout(test_file) except CheckoutError as e: - # detailed exceptions are only possible in older git versions - if rw_repo.git._version_info < (2, 29): + # Detailed exceptions are only possible in older git versions. + if rw_repo.git.version_info < (2, 29): self.assertEqual(len(e.failed_files), 1) self.assertEqual(e.failed_files[0], osp.basename(test_file)) self.assertEqual(len(e.failed_files), len(e.failed_reasons)) @@ -400,19 +535,17 @@ class TestIndex(TestBase): else: raise AssertionError("Exception CheckoutError not thrown") - # if we force it it should work + # If we force it, it should work. index.checkout(test_file, force=True) assert not open(test_file, "rb").read().endswith(append_data) - # checkout directory + # Check out directory. rmtree(osp.join(rw_repo.working_tree_dir, "lib")) rval = index.checkout("lib") assert len(list(rval)) > 1 def _count_existing(self, repo, files): - """ - Returns count of files that actually exist in the repository directory. - """ + """Return count of files that actually exist in the repository directory.""" existing = 0 basedir = repo.working_tree_dir for f in files: @@ -422,13 +555,10 @@ class TestIndex(TestBase): # END num existing helper - @skipIf( - HIDE_WINDOWS_KNOWN_ERRORS and Git.is_cygwin(), - """FIXME: File "C:\\projects\\gitpython\\git\\test\\test_index.py", line 642, in test_index_mutation - self.assertEqual(fd.read(), link_target) - AssertionError: '!<symlink>\xff\xfe/\x00e\x00t\x00c\x00/\x00t\x00h\x00a\x00t\x00\x00\x00' - != '/etc/that' - """, + @pytest.mark.xfail( + sys.platform == "win32" and Git().config("core.symlinks") == "true", + reason="Assumes symlinks are not created on Windows and opens a symlink to a nonexistent target.", + raises=FileNotFoundError, ) @with_rw_repo("0.1.6") def test_index_mutation(self, rw_repo): @@ -443,19 +573,21 @@ class TestIndex(TestBase): writer.set_value("user", "email", umail) self.assertEqual(writer.get_value("user", "name"), uname) - # remove all of the files, provide a wild mix of paths, BaseIndexEntries, - # IndexEntries + # Remove all of the files, provide a wild mix of paths, BaseIndexEntries, + # IndexEntries. def mixed_iterator(): count = 0 for entry in index.entries.values(): - type_id = count % 4 - if type_id == 0: # path + type_id = count % 5 + if type_id == 0: # path (str) yield entry.path - elif type_id == 1: # blob + elif type_id == 1: # path (PathLike) + yield Path(entry.path) + elif type_id == 2: # blob yield Blob(rw_repo, entry.binsha, entry.mode, entry.path) - elif type_id == 2: # BaseIndexEntry + elif type_id == 3: # BaseIndexEntry yield BaseIndexEntry(entry[:4]) - elif type_id == 3: # IndexEntry + elif type_id == 4: # IndexEntry yield entry else: raise AssertionError("Invalid Type") @@ -468,29 +600,29 @@ class TestIndex(TestBase): self.assertEqual(self._count_existing(rw_repo, deleted_files), len(deleted_files)) self.assertEqual(len(index.entries), 0) - # reset the index to undo our changes + # Reset the index to undo our changes. index.reset() self.assertEqual(len(index.entries), num_entries) - # remove with working copy + # Remove with working copy. deleted_files = index.remove(mixed_iterator(), working_tree=True) assert deleted_files self.assertEqual(self._count_existing(rw_repo, deleted_files), 0) - # reset everything + # Reset everything. index.reset(working_tree=True) self.assertEqual(self._count_existing(rw_repo, deleted_files), len(deleted_files)) - # invalid type + # Invalid type. self.assertRaises(TypeError, index.remove, [1]) - # absolute path + # Absolute path. deleted_files = index.remove([osp.join(rw_repo.working_tree_dir, "lib")], r=True) assert len(deleted_files) > 1 self.assertRaises(ValueError, index.remove, ["/doesnt/exists"]) # TEST COMMITTING - # commit changed index + # Commit changed index. cur_commit = cur_head.commit commit_message = "commit default head by Frèderic Çaufl€" @@ -505,7 +637,7 @@ class TestIndex(TestBase): self.assertEqual(len(new_commit.parents), 1) self.assertEqual(cur_head.commit, cur_commit) - # commit with other actor + # Commit with other actor. cur_commit = cur_head.commit my_author = Actor("Frèderic Çaufl€", "author@example.com") @@ -522,7 +654,7 @@ class TestIndex(TestBase): self.assertEqual(cur_head.commit, commit_actor) self.assertEqual(cur_head.log()[-1].actor, my_committer) - # commit with author_date and commit_date + # Commit with author_date and commit_date. cur_commit = cur_head.commit commit_message = "commit with dates by Avinash Sajjanshetty" @@ -537,14 +669,14 @@ class TestIndex(TestBase): self.assertEqual(new_commit.authored_date, 1144447993) self.assertEqual(new_commit.committed_date, 1112911993) - # same index, no parents + # Same index, no parents. commit_message = "index without parents" commit_no_parents = index.commit(commit_message, parent_commits=[], head=True) self.assertEqual(commit_no_parents.message, commit_message) self.assertEqual(len(commit_no_parents.parents), 0) self.assertEqual(cur_head.commit, commit_no_parents) - # same index, multiple parents + # same index, multiple parents. commit_message = "Index with multiple parents\n commit with another line" commit_multi_parent = index.commit(commit_message, parent_commits=(commit_no_parents, new_commit)) self.assertEqual(commit_multi_parent.message, commit_message) @@ -553,41 +685,41 @@ class TestIndex(TestBase): self.assertEqual(commit_multi_parent.parents[1], new_commit) self.assertEqual(cur_head.commit, commit_multi_parent) - # re-add all files in lib - # get the lib folder back on disk, but get an index without it + # Re-add all files in lib. + # Get the lib folder back on disk, but get an index without it. index.reset(new_commit.parents[0], working_tree=True).reset(new_commit, working_tree=False) lib_file_path = osp.join("lib", "git", "__init__.py") assert (lib_file_path, 0) not in index.entries assert osp.isfile(osp.join(rw_repo.working_tree_dir, lib_file_path)) - # directory + # Directory. entries = index.add(["lib"], fprogress=self._fprogress_add) self._assert_entries(entries) self._assert_fprogress(entries) assert len(entries) > 1 - # glob + # Glob. entries = index.reset(new_commit).add([osp.join("lib", "git", "*.py")], fprogress=self._fprogress_add) self._assert_entries(entries) self._assert_fprogress(entries) self.assertEqual(len(entries), 14) - # same file + # Same file. entries = index.reset(new_commit).add( [osp.join(rw_repo.working_tree_dir, "lib", "git", "head.py")] * 2, fprogress=self._fprogress_add, ) self._assert_entries(entries) self.assertEqual(entries[0].mode & 0o644, 0o644) - # would fail, test is too primitive to handle this case + # Would fail, test is too primitive to handle this case. # self._assert_fprogress(entries) self._reset_progress() self.assertEqual(len(entries), 2) - # missing path + # Missing path. self.assertRaises(OSError, index.reset(new_commit).add, ["doesnt/exist/must/raise"]) - # blob from older revision overrides current index revision + # Blob from older revision overrides current index revision. old_blob = new_commit.parents[0].tree.blobs[0] entries = index.reset(new_commit).add([old_blob], fprogress=self._fprogress_add) self._assert_entries(entries) @@ -595,7 +727,7 @@ class TestIndex(TestBase): self.assertEqual(index.entries[(old_blob.path, 0)].hexsha, old_blob.hexsha) self.assertEqual(len(entries), 1) - # mode 0 not allowed + # Mode 0 not allowed. null_hex_sha = Diff.NULL_HEX_SHA null_bin_sha = b"\0" * 20 self.assertRaises( @@ -604,7 +736,7 @@ class TestIndex(TestBase): [BaseIndexEntry((0, null_bin_sha, 0, "doesntmatter"))], ) - # add new file + # Add new file. new_file_relapath = "my_new_file" self._make_file(new_file_relapath, "hello world", rw_repo) entries = index.reset(new_commit).add( @@ -616,8 +748,8 @@ class TestIndex(TestBase): self.assertEqual(len(entries), 1) self.assertNotEqual(entries[0].hexsha, null_hex_sha) - # add symlink - if not is_win: + # Add symlink. + if sys.platform != "win32": for target in ("/etc/nonexisting", "/etc/passwd", "/etc"): basename = "my_real_symlink" @@ -630,17 +762,17 @@ class TestIndex(TestBase): self.assertTrue(S_ISLNK(entries[0].mode)) self.assertTrue(S_ISLNK(index.entries[index.entry_key("my_real_symlink", 0)].mode)) - # we expect only the target to be written + # We expect only the target to be written. self.assertEqual( index.repo.odb.stream(entries[0].binsha).read().decode("ascii"), target, ) os.remove(link_file) - # end for each target + # END for each target # END real symlink test - # add fake symlink and assure it checks-our as symlink + # Add fake symlink and assure it checks out as a symlink. fake_symlink_relapath = "my_fake_symlink" link_target = "/etc/that" fake_symlink_path = self._make_file(fake_symlink_relapath, link_target, rw_repo) @@ -652,7 +784,7 @@ class TestIndex(TestBase): self.assertEqual(len(entries), 1) self.assertTrue(S_ISLNK(entries[0].mode)) - # assure this also works with an alternate method + # Check that this also works with an alternate method. full_index_entry = IndexEntry.from_base(BaseIndexEntry((0o120000, entries[0].binsha, 0, entries[0].path))) entry_key = index.entry_key(full_index_entry) index.reset(new_commit) @@ -660,24 +792,24 @@ class TestIndex(TestBase): assert entry_key not in index.entries index.entries[entry_key] = full_index_entry index.write() - index.update() # force reread of entries + index.update() # Force reread of entries. new_entry = index.entries[entry_key] assert S_ISLNK(new_entry.mode) - # a tree created from this should contain the symlink + # A tree created from this should contain the symlink. tree = index.write_tree() assert fake_symlink_relapath in tree - index.write() # flush our changes for the checkout + index.write() # Flush our changes for the checkout. - # checkout the fakelink, should be a link then + # Check out the fake link, should be a link then. assert not S_ISLNK(os.stat(fake_symlink_path)[ST_MODE]) os.remove(fake_symlink_path) index.checkout(fake_symlink_path) - # on windows we will never get symlinks - if is_win: - # simlinks should contain the link as text ( which is what a - # symlink actually is ) + # On Windows, we currently assume we will never get symlinks. + if sys.platform == "win32": + # Symlinks should contain the link as text (which is what a + # symlink actually is). with open(fake_symlink_path, "rt") as fd: self.assertEqual(fd.read(), link_target) else: @@ -692,24 +824,24 @@ class TestIndex(TestBase): # END move assertion utility self.assertRaises(ValueError, index.move, ["just_one_path"]) - # file onto existing file + # Try to move a file onto an existing file. files = ["AUTHORS", "LICENSE"] self.assertRaises(GitCommandError, index.move, files) - # again, with force + # Again, with force. assert_mv_rval(index.move(files, f=True)) - # files into directory - dry run + # Move files into a directory - dry run. paths = ["LICENSE", "VERSION", "doc"] rval = index.move(paths, dry_run=True) self.assertEqual(len(rval), 2) assert osp.exists(paths[0]) - # again, no dry run + # Again, no dry run. rval = index.move(paths) assert_mv_rval(rval) - # dir into dir + # Move dir into dir. rval = index.move(["doc", "test"]) assert_mv_rval(rval) @@ -725,7 +857,7 @@ class TestIndex(TestBase): # END rewriter def make_paths(): - # two existing ones, one new one + """Help out the test by yielding two existing paths and one new path.""" yield "CHANGES" yield "ez_setup.py" yield index.entries[index.entry_key("README", 0)] @@ -766,12 +898,12 @@ class TestIndex(TestBase): for fkey in keys: assert fkey in index.entries - # just the index + # Just the index. index.reset(paths=(arela, afile)) assert akey not in index.entries assert bkey in index.entries - # now with working tree - files on disk as well as entries must be recreated + # Now with working tree - files on disk as well as entries must be recreated. rw_repo.head.commit = nc for absfile in absfiles: os.remove(absfile) @@ -785,8 +917,8 @@ class TestIndex(TestBase): @with_rw_repo("HEAD") def test_compare_write_tree(self, rw_repo): - # write all trees and compare them - # its important to have a few submodules in there too + """Test writing all trees, comparing them for equality.""" + # It's important to have a few submodules in there too. max_count = 25 count = 0 for commit in rw_repo.head.commit.traverse(): @@ -819,10 +951,10 @@ class TestIndex(TestBase): @with_rw_repo("HEAD", bare=True) def test_index_bare_add(self, rw_bare_repo): - # Something is wrong after cloning to a bare repo, reading the - # property rw_bare_repo.working_tree_dir will return '/tmp' - # instead of throwing the Exception we are expecting. This is - # a quick hack to make this test fail when expected. + # Something is wrong after cloning to a bare repo, reading the property + # rw_bare_repo.working_tree_dir will return '/tmp' instead of throwing the + # Exception we are expecting. This is a quick hack to make this test fail when + # expected. assert rw_bare_repo.working_tree_dir is None assert rw_bare_repo.bare contents = b"This is a BytesIO file" @@ -847,7 +979,8 @@ class TestIndex(TestBase): @with_rw_directory def test_add_utf8P_path(self, rw_dir): - # NOTE: fp is not a Unicode object in python 2 (which is the source of the problem) + # NOTE: fp is not a Unicode object in Python 2 + # (which is the source of the problem). fp = osp.join(rw_dir, "ø.txt") with open(fp, "wb") as fs: fs.write("content of ø".encode("utf-8")) @@ -858,7 +991,7 @@ class TestIndex(TestBase): @with_rw_directory def test_add_a_file_with_wildcard_chars(self, rw_dir): - # see issue #407 + # See issue #407. fp = osp.join(rw_dir, "[.exe") with open(fp, "wb") as f: f.write(b"something") @@ -870,7 +1003,7 @@ class TestIndex(TestBase): def test__to_relative_path_at_root(self): root = osp.abspath(os.sep) - class Mocked(object): + class Mocked: bare = False git_dir = root working_tree_dir = root @@ -882,12 +1015,86 @@ class TestIndex(TestBase): rel = index._to_relative_path(path) self.assertEqual(rel, os.path.relpath(path, root)) + @pytest.mark.xfail( + type(_win_bash_status) is WinBashStatus.Absent, + reason="Can't run a hook on Windows without bash.exe.", + raises=HookExecutionError, + ) + @pytest.mark.xfail( + type(_win_bash_status) is WinBashStatus.WslNoDistro, + reason="Currently uses the bash.exe of WSL, even with no WSL distro installed", + raises=HookExecutionError, + ) + @with_rw_repo("HEAD", bare=True) + def test_run_commit_hook(self, rw_repo): + index = rw_repo.index + _make_hook(index.repo.git_dir, "fake-hook", "echo 'ran fake hook' >output.txt") + run_commit_hook("fake-hook", index) + output = Path(rw_repo.git_dir, "output.txt").read_text(encoding="utf-8") + self.assertEqual(output, "ran fake hook\n") + + @ddt.data((False,), (True,)) + @with_rw_directory + def test_hook_uses_shell_not_from_cwd(self, rw_dir, case): + (chdir_to_repo,) = case + + shell_name = "bash.exe" if sys.platform == "win32" else "sh" + maybe_chdir = cwd(rw_dir) if chdir_to_repo else contextlib.nullcontext() + repo = Repo.init(rw_dir) + + # We need an impostor shell that works on Windows and that the test can + # distinguish from the real bash.exe. But even if the real bash.exe is absent or + # unusable, we should verify the impostor is not run. So the impostor needs a + # clear side effect (unlike in TestGit.test_it_executes_git_not_from_cwd). Popen + # on Windows uses CreateProcessW, which disregards PATHEXT; the impostor may + # need to be a binary executable to ensure the vulnerability is found if + # present. No compiler need exist, shipping a binary in the test suite may + # target the wrong architecture, and generating one in a bespoke way may trigger + # false positive virus scans. So we use a Bash/Python polyglot for the hook and + # use the Python interpreter itself as the bash.exe impostor. But an interpreter + # from a venv may not run when copied outside of it, and a global interpreter + # won't run when copied to a different location if it was installed from the + # Microsoft Store. So we make a new venv in rw_dir and use its interpreter. + venv = VirtualEnvironment(rw_dir, with_pip=False) + shutil.copy(venv.python, Path(rw_dir, shell_name)) + shutil.copy(fixture_path("polyglot"), hook_path("polyglot", repo.git_dir)) + payload = Path(rw_dir, "payload.txt") + + if type(_win_bash_status) in {WinBashStatus.Absent, WinBashStatus.WslNoDistro}: + # The real shell can't run, but the impostor should still not be used. + with self.assertRaises(HookExecutionError): + with maybe_chdir: + run_commit_hook("polyglot", repo.index) + self.assertFalse(payload.exists()) + else: + # The real shell should run, and not the impostor. + with maybe_chdir: + run_commit_hook("polyglot", repo.index) + self.assertFalse(payload.exists()) + output = Path(rw_dir, "output.txt").read_text(encoding="utf-8") + self.assertEqual(output, "Ran intended hook.\n") + + @pytest.mark.xfail( + type(_win_bash_status) is WinBashStatus.Absent, + reason="Can't run a hook on Windows without bash.exe.", + raises=HookExecutionError, + ) + @pytest.mark.xfail( + type(_win_bash_status) is WinBashStatus.WslNoDistro, + reason="Currently uses the bash.exe of WSL, even with no WSL distro installed", + raises=HookExecutionError, + ) @with_rw_repo("HEAD", bare=True) def test_pre_commit_hook_success(self, rw_repo): index = rw_repo.index _make_hook(index.repo.git_dir, "pre-commit", "exit 0") index.commit("This should not fail") + @pytest.mark.xfail( + type(_win_bash_status) is WinBashStatus.WslNoDistro, + reason="Currently uses the bash.exe of WSL, even with no WSL distro installed", + raises=AssertionError, + ) @with_rw_repo("HEAD", bare=True) def test_pre_commit_hook_fail(self, rw_repo): index = rw_repo.index @@ -895,7 +1102,7 @@ class TestIndex(TestBase): try: index.commit("This should fail") except HookExecutionError as err: - if is_win_without_bash: + if type(_win_bash_status) is WinBashStatus.Absent: self.assertIsInstance(err.status, OSError) self.assertEqual(err.command, [hp]) self.assertEqual(err.stdout, "") @@ -910,7 +1117,21 @@ class TestIndex(TestBase): else: raise AssertionError("Should have caught a HookExecutionError") - @skipIf(HIDE_WINDOWS_KNOWN_ERRORS, "TODO: fix hooks execution on Windows: #703") + @pytest.mark.xfail( + type(_win_bash_status) is WinBashStatus.Absent, + reason="Can't run a hook on Windows without bash.exe.", + raises=HookExecutionError, + ) + @pytest.mark.xfail( + type(_win_bash_status) is WinBashStatus.Wsl, + reason="Specifically seems to fail on WSL bash (in spite of #1399)", + raises=AssertionError, + ) + @pytest.mark.xfail( + type(_win_bash_status) is WinBashStatus.WslNoDistro, + reason="Currently uses the bash.exe of WSL, even with no WSL distro installed", + raises=HookExecutionError, + ) @with_rw_repo("HEAD", bare=True) def test_commit_msg_hook_success(self, rw_repo): commit_message = "commit default head by Frèderic Çaufl€" @@ -924,6 +1145,11 @@ class TestIndex(TestBase): new_commit = index.commit(commit_message) self.assertEqual(new_commit.message, "{} {}".format(commit_message, from_hook_message)) + @pytest.mark.xfail( + type(_win_bash_status) is WinBashStatus.WslNoDistro, + reason="Currently uses the bash.exe of WSL, even with no WSL distro installed", + raises=AssertionError, + ) @with_rw_repo("HEAD", bare=True) def test_commit_msg_hook_fail(self, rw_repo): index = rw_repo.index @@ -931,7 +1157,7 @@ class TestIndex(TestBase): try: index.commit("This should fail") except HookExecutionError as err: - if is_win_without_bash: + if type(_win_bash_status) is WinBashStatus.Absent: self.assertIsInstance(err.status, OSError) self.assertEqual(err.command, [hp]) self.assertEqual(err.stdout, "") @@ -954,3 +1180,37 @@ class TestIndex(TestBase): file.touch() rw_repo.index.add(file) + + @with_rw_repo("HEAD") + def test_index_add_non_normalized_path(self, rw_repo): + git_dir = Path(rw_repo.git_dir) + + file = git_dir / "file.txt" + file.touch() + non_normalized_path = file.as_posix() + if os.name != "nt": + non_normalized_path = "/" + non_normalized_path[1:].replace("/", "//") + + rw_repo.index.add(non_normalized_path) + + +class TestIndexUtils: + @pytest.mark.parametrize("file_path_type", [str, Path]) + def test_temporary_file_swap(self, tmp_path, file_path_type): + file_path = tmp_path / "foo" + file_path.write_bytes(b"some data") + + with TemporaryFileSwap(file_path_type(file_path)) as ctx: + assert Path(ctx.file_path) == file_path + assert not file_path.exists() + + # Recreate it with new data, so we can observe that they're really separate. + file_path.write_bytes(b"other data") + + temp_file_path = Path(ctx.tmp_file_path) + assert temp_file_path.parent == file_path.parent + assert temp_file_path.name.startswith(file_path.name) + assert temp_file_path.read_bytes() == b"some data" + + assert not temp_file_path.exists() + assert file_path.read_bytes() == b"some data" # Not b"other data". diff --git a/test/test_installation.py b/test/test_installation.py index 0cb0c71..ae6472e 100644 --- a/test/test_installation.py +++ b/test/test_installation.py @@ -1,50 +1,58 @@ -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ import ast import os import subprocess -import sys -from git.compat import is_win -from test.lib import TestBase -from test.lib.helper import with_rw_directory +from test.lib import TestBase, VirtualEnvironment, with_rw_directory class TestInstallation(TestBase): - def setUp_venv(self, rw_dir): - self.venv = rw_dir - subprocess.run([sys.executable, "-m", "venv", self.venv], stdout=subprocess.PIPE) - bin_name = "Scripts" if is_win else "bin" - self.python = os.path.join(self.venv, bin_name, "python") - self.pip = os.path.join(self.venv, bin_name, "pip") - self.sources = os.path.join(self.venv, "src") - self.cwd = os.path.dirname(os.path.dirname(__file__)) - os.symlink(self.cwd, self.sources, target_is_directory=True) - @with_rw_directory def test_installation(self, rw_dir): - self.setUp_venv(rw_dir) + venv = self._set_up_venv(rw_dir) + result = subprocess.run( - [self.pip, "install", "."], + [venv.pip, "install", "."], stdout=subprocess.PIPE, - cwd=self.sources, + cwd=venv.sources, ) self.assertEqual( 0, result.returncode, msg=result.stderr or result.stdout or "Can't install project", ) - result = subprocess.run([self.python, "-c", "import git"], stdout=subprocess.PIPE, cwd=self.sources) + + result = subprocess.run( + [venv.python, "-c", "import git"], + stdout=subprocess.PIPE, + cwd=venv.sources, + ) self.assertEqual( 0, result.returncode, - msg=result.stderr or result.stdout or "Selftest failed", + msg=result.stderr or result.stdout or "Self-test failed", ) + result = subprocess.run( - [self.python, "-c", "import sys;import git; print(sys.path)"], + [venv.python, "-c", "import gitdb; import smmap"], stdout=subprocess.PIPE, - cwd=self.sources, + cwd=venv.sources, + ) + self.assertEqual( + 0, + result.returncode, + msg=result.stderr or result.stdout or "Dependencies not installed", + ) + + # Even IF gitdb or any other dependency is supplied during development by + # inserting its location into PYTHONPATH or otherwise patched into sys.path, + # make sure it is not wrongly inserted as the *first* entry. + result = subprocess.run( + [venv.python, "-c", "import sys; import git; print(sys.path)"], + stdout=subprocess.PIPE, + cwd=venv.sources, ) syspath = result.stdout.decode("utf-8").splitlines()[0] syspath = ast.literal_eval(syspath) @@ -53,4 +61,13 @@ class TestInstallation(TestBase): syspath[0], msg="Failed to follow the conventions for https://docs.python.org/3/library/sys.html#sys.path", ) - self.assertTrue(syspath[1].endswith("gitdb"), msg="Failed to add gitdb to sys.path") + + @staticmethod + def _set_up_venv(rw_dir): + venv = VirtualEnvironment(rw_dir, with_pip=True) + os.symlink( + os.path.dirname(os.path.dirname(__file__)), + venv.sources, + target_is_directory=True, + ) + return venv diff --git a/test/test_quick_doc.py b/test/test_quick_doc.py index 342a7f2..4ef75f4 100644 --- a/test/test_quick_doc.py +++ b/test/test_quick_doc.py @@ -1,11 +1,14 @@ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +import gc + from test.lib import TestBase from test.lib.helper import with_rw_directory class QuickDoc(TestBase): def tearDown(self): - import gc - gc.collect() @with_rw_directory @@ -28,7 +31,7 @@ class QuickDoc(TestBase): def test_cloned_repo_object(self, local_dir): from git import Repo - # code to clone from url + # Code to clone from url # [1-test_cloned_repo_object] # $ git clone <url> <local_dir> @@ -37,7 +40,7 @@ class QuickDoc(TestBase): repo = Repo.clone_from(repo_url, local_dir) # ![1-test_cloned_repo_object] - # code to add files + # Code to add files # [2-test_cloned_repo_object] # We must make a change to a file so that we can add the update to git @@ -52,7 +55,7 @@ class QuickDoc(TestBase): repo.index.add(add_file) # notice the add function requires a list of paths # ![3-test_cloned_repo_object] - # code to commit - not sure how to test this + # Code to commit - not sure how to test this # [4-test_cloned_repo_object] # $ git commit -m <message> repo.index.commit("Update to file2") @@ -61,8 +64,8 @@ class QuickDoc(TestBase): # [5-test_cloned_repo_object] # $ git log <file> - # relative path from git root - repo.iter_commits(all=True, max_count=10, paths=update_file) # gets the last 10 commits from all branches + # Relative path from git root + repo.iter_commits(all=True, max_count=10, paths=update_file) # Gets the last 10 commits from all branches. # Outputs: <generator object Commit._iter_from_process_or_stream at 0x7fb66c186cf0> @@ -79,7 +82,7 @@ class QuickDoc(TestBase): # Untracked files - create new file # [7-test_cloned_repo_object] - f = open(f"{local_dir}/untracked.txt", "w") # creates an empty file + f = open(f"{local_dir}/untracked.txt", "w") # Creates an empty file. f.close() # ![7-test_cloned_repo_object] @@ -90,14 +93,14 @@ class QuickDoc(TestBase): # Modified files # [9-test_cloned_repo_object] - # Let's modify one of our tracked files + # Let's modify one of our tracked files. with open(f"{local_dir}/Downloads/file3.txt", "w") as f: - f.write("file3 version 2") # overwrite file 3 + f.write("file3 version 2") # Overwrite file 3. # ![9-test_cloned_repo_object] # [10-test_cloned_repo_object] - repo.index.diff(None) # compares staging area to working directory + repo.index.diff(None) # Compares staging area to working directory. # Output: [<git.diff.Diff object at 0x7fb66c076e50>, # <git.diff.Diff object at 0x7fb66c076ca0>] @@ -112,7 +115,7 @@ class QuickDoc(TestBase): # Downloads/file3.txt # ![11-test_cloned_repo_object] - # compares staging area to head commit + # Compares staging area to head commit # [11.1-test_cloned_repo_object] diffs = repo.index.diff(repo.head.commit) for d in diffs: @@ -122,7 +125,7 @@ class QuickDoc(TestBase): # ![11.1-test_cloned_repo_object] # [11.2-test_cloned_repo_object] - # lets add untracked.txt + # Let's add untracked.txt. repo.index.add(["untracked.txt"]) diffs = repo.index.diff(repo.head.commit) for d in diffs: @@ -152,7 +155,7 @@ class QuickDoc(TestBase): # Previous commit tree # [13-test_cloned_repo_object] - prev_commits = list(repo.iter_commits(all=True, max_count=10)) # last 10 commits from all branches + prev_commits = list(repo.iter_commits(all=True, max_count=10)) # Last 10 commits from all branches. tree = prev_commits[0].tree # ![13-test_cloned_repo_object] @@ -191,29 +194,29 @@ class QuickDoc(TestBase): # Printing text files # [17-test_cloned_repo_object] print_file = "dir1/file2.txt" - tree[print_file] # the head commit tree + tree[print_file] # The head commit tree. # Output <git.Blob "SHA1-HEX-HASH"> # ![17-test_cloned_repo_object] - # print latest file + # Print latest file # [18-test_cloned_repo_object] blob = tree[print_file] print(blob.data_stream.read().decode()) # Output - # file 2 version 1 + # File 2 version 1 # Update version 2 # ![18-test_cloned_repo_object] - # print previous tree + # Print previous tree # [18.1-test_cloned_repo_object] commits_for_file = list(repo.iter_commits(all=True, paths=print_file)) - tree = commits_for_file[-1].tree # gets the first commit tree + tree = commits_for_file[-1].tree # Gets the first commit tree. blob = tree[print_file] print(blob.data_stream.read().decode()) # Output - # file 2 version 1 + # File 2 version 1 # ![18.1-test_cloned_repo_object] diff --git a/test/test_reflog.py b/test/test_reflog.py index e899ac4..7ce6421 100644 --- a/test/test_reflog.py +++ b/test/test_reflog.py @@ -1,12 +1,14 @@ -import os +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +import os.path as osp import tempfile from git.objects import IndexObject -from git.refs import RefLogEntry, RefLog -from test.lib import TestBase, fixture_path -from git.util import Actor, rmtree, hex_to_bin +from git.refs import RefLog, RefLogEntry +from git.util import Actor, hex_to_bin, rmtree -import os.path as osp +from test.lib import TestBase, fixture_path class TestRefLog(TestBase): @@ -26,39 +28,38 @@ class TestRefLog(TestBase): assert e.time[1] == 1 assert e.message == msg - # check representation (roughly) + # Check representation (roughly). assert repr(e).startswith(nullhexsha) def test_base(self): rlp_head = fixture_path("reflog_HEAD") rlp_master = fixture_path("reflog_master") - tdir = tempfile.mktemp(suffix="test_reflogs") - os.mkdir(tdir) + tdir = tempfile.mkdtemp(suffix="test_reflogs") rlp_master_ro = RefLog.path(self.rorepo.head) assert osp.isfile(rlp_master_ro) - # simple read + # Simple read. reflog = RefLog.from_file(rlp_master_ro) assert reflog._path is not None assert isinstance(reflog, RefLog) assert len(reflog) - # iter_entries works with path and with stream + # iter_entries works with path and with stream. assert len(list(RefLog.iter_entries(open(rlp_master, "rb")))) assert len(list(RefLog.iter_entries(rlp_master))) - # raise on invalid revlog - # TODO: Try multiple corrupted ones ! + # Raise on invalid revlog. + # TODO: Try multiple corrupted ones! pp = "reflog_invalid_" for suffix in ("oldsha", "newsha", "email", "date", "sep"): self.assertRaises(ValueError, RefLog.from_file, fixture_path(pp + suffix)) # END for each invalid file - # cannot write an uninitialized reflog + # Cannot write an uninitialized reflog. self.assertRaises(ValueError, RefLog().write) - # test serialize and deserialize - results must match exactly + # Test serialize and deserialize - results must match exactly. binsha = hex_to_bin(("f" * 40).encode("ascii")) msg = "my reflog message" cr = self.rorepo.config_reader() @@ -68,33 +69,33 @@ class TestRefLog(TestBase): reflog.to_file(tfile) assert reflog.write() is reflog - # parsed result must match ... + # Parsed result must match... treflog = RefLog.from_file(tfile) assert treflog == reflog - # ... as well as each bytes of the written stream + # ...as well as each bytes of the written stream. assert open(tfile).read() == open(rlp).read() - # append an entry + # Append an entry. entry = RefLog.append_entry(cr, tfile, IndexObject.NULL_BIN_SHA, binsha, msg) assert entry.oldhexsha == IndexObject.NULL_HEX_SHA assert entry.newhexsha == "f" * 40 assert entry.message == msg assert RefLog.from_file(tfile)[-1] == entry - # index entry - # raises on invalid index + # Index entry. + # Raises on invalid index. self.assertRaises(IndexError, RefLog.entry_at, rlp, 10000) - # indices can be positive ... + # Indices can be positive... assert isinstance(RefLog.entry_at(rlp, 0), RefLogEntry) RefLog.entry_at(rlp, 23) - # ... and negative + # ...and negative. for idx in (-1, -24): RefLog.entry_at(rlp, idx) # END for each index to read # END for each reflog - # finally remove our temporary data + # Finally remove our temporary data. rmtree(tdir) diff --git a/test/test_refs.py b/test/test_refs.py index f9fc8b0..08096e6 100644 --- a/test/test_refs.py +++ b/test/test_refs.py @@ -1,36 +1,36 @@ -# test_refs.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ from itertools import chain +import os.path as osp from pathlib import Path +import tempfile + +from gitdb.exc import BadName from git import ( - Reference, - Head, - TagReference, - RemoteReference, Commit, - SymbolicReference, GitCommandError, - RefLog, GitConfigParser, + Head, + RefLog, + Reference, + RemoteReference, + SymbolicReference, + TagReference, ) from git.objects.tag import TagObject -from test.lib import TestBase, with_rw_repo +import git.refs as refs from git.util import Actor -from gitdb.exc import BadName -import git.refs as refs -import os.path as osp -import tempfile +from test.lib import TestBase, with_rw_repo class TestRefs(TestBase): def test_from_path(self): - # should be able to create any reference directly + # Should be able to create any reference directly. for ref_type in (Reference, Head, TagReference, RemoteReference): for name in ("rela_name", "path/rela_name"): full_path = ref_type.to_full_path(name) @@ -39,9 +39,9 @@ class TestRefs(TestBase): # END for each name # END for each type - # invalid path + # Invalid path. self.assertRaises(ValueError, TagReference, self.rorepo, "refs/invalid/tag") - # works without path check + # Works without path check. TagReference(self.rorepo, "refs/invalid/tag", check_path=False) def test_tag_base(self): @@ -53,7 +53,7 @@ class TestRefs(TestBase): if tag.tag is not None: tag_object_refs.append(tag) tagobj = tag.tag - # have no dict + # Have no dict. self.assertRaises(AttributeError, setattr, tagobj, "someattr", 1) assert isinstance(tagobj, TagObject) assert tagobj.tag == tag.name @@ -62,7 +62,7 @@ class TestRefs(TestBase): assert isinstance(tagobj.tagger_tz_offset, int) assert tagobj.message assert tag.object == tagobj - # can't assign the object + # Can't assign the object. self.assertRaises(AttributeError, setattr, tag, "object", tagobj) # END if we have a tag object # END for tag in repo-tags @@ -77,7 +77,7 @@ class TestRefs(TestBase): assert tagger_name == "Michael Trier" def test_tags(self): - # tag refs can point to tag objects or to commits + # Tag refs can point to tag objects or to commits. s = set() ref_count = 0 for ref in chain(self.rorepo.tags, self.rorepo.heads): @@ -100,8 +100,8 @@ class TestRefs(TestBase): assert "refs/heads" in head.path prev_object = head.object cur_object = head.object - assert prev_object == cur_object # represent the same git object - assert prev_object is not cur_object # but are different instances + assert prev_object == cur_object # Represent the same git object... + assert prev_object is not cur_object # ...but are different instances. with head.config_writer() as writer: tv = "testopt" @@ -111,7 +111,7 @@ class TestRefs(TestBase): with head.config_writer() as writer: writer.remove_option(tv) - # after the clone, we might still have a tracking branch setup + # After the clone, we might still have a tracking branch setup. head.set_tracking_branch(None) assert head.tracking_branch() is None remote_ref = rwrepo.remotes[0].refs[0] @@ -123,7 +123,7 @@ class TestRefs(TestBase): special_name = "feature#123" special_name_remote_ref = SymbolicReference.create(rwrepo, "refs/remotes/origin/%s" % special_name) gp_tracking_branch = rwrepo.create_head("gp_tracking#123") - special_name_remote_ref = rwrepo.remotes[0].refs[special_name] # get correct type + special_name_remote_ref = rwrepo.remotes[0].refs[special_name] # Get correct type. gp_tracking_branch.set_tracking_branch(special_name_remote_ref) TBranch = gp_tracking_branch.tracking_branch() if TBranch is not None: @@ -136,7 +136,7 @@ class TestRefs(TestBase): assert TBranch.name == special_name_remote_ref.name # END for each head - # verify REFLOG gets altered + # Verify REFLOG gets altered. head = rwrepo.head cur_head = head.ref cur_commit = cur_head.commit @@ -144,32 +144,31 @@ class TestRefs(TestBase): hlog_len = len(head.log()) blog_len = len(cur_head.log()) assert head.set_reference(pcommit, "detached head") is head - # one new log-entry + # One new log-entry. thlog = head.log() assert len(thlog) == hlog_len + 1 assert thlog[-1].oldhexsha == cur_commit.hexsha assert thlog[-1].newhexsha == pcommit.hexsha - # the ref didn't change though + # The ref didn't change though. assert len(cur_head.log()) == blog_len - # head changes once again, cur_head doesn't change + # head changes once again, cur_head doesn't change. head.set_reference(cur_head, "reattach head") assert len(head.log()) == hlog_len + 2 assert len(cur_head.log()) == blog_len - # adjusting the head-ref also adjust the head, so both reflogs are - # altered + # Adjusting the head-ref also adjust the head, so both reflogs are altered. cur_head.set_commit(pcommit, "changing commit") assert len(cur_head.log()) == blog_len + 1 assert len(head.log()) == hlog_len + 3 - # with automatic dereferencing + # With automatic dereferencing. assert head.set_commit(cur_commit, "change commit once again") is head assert len(head.log()) == hlog_len + 4 assert len(cur_head.log()) == blog_len + 2 - # a new branch has just a single entry + # A new branch has just a single entry. other_head = Head.create(rwrepo, "mynewhead", pcommit, logmsg="new head created") log = other_head.log() assert len(log) == 1 @@ -178,7 +177,7 @@ class TestRefs(TestBase): @with_rw_repo("HEAD", bare=False) def test_set_tracking_branch_with_import(self, rwrepo): - # prepare included config file + # Prepare included config file. included_config = osp.join(rwrepo.git_dir, "config.include") with GitConfigParser(included_config, read_only=False) as writer: writer.set_value("test", "value", "test") @@ -209,7 +208,7 @@ class TestRefs(TestBase): assert not SymbolicReference(self.rorepo, "hellothere").is_valid() def test_orig_head(self): - assert type(self.rorepo.head.orig_head()) == SymbolicReference + assert type(self.rorepo.head.orig_head()) is SymbolicReference @with_rw_repo("0.1.6") def test_head_checkout_detached_head(self, rw_repo): @@ -230,11 +229,11 @@ class TestRefs(TestBase): cur_head.reset(new_head_commit, index=True, working_tree=True) # index + wt assert cur_head.reference.commit == new_head_commit - # paths - make sure we have something to do + # Paths - make sure we have something to do. rw_repo.index.reset(old_head_commit.parents[0]) cur_head.reset(cur_head, paths="test") cur_head.reset(new_head_commit, paths="lib") - # hard resets with paths don't work, its all or nothing + # Hard resets with paths don't work; it's all or nothing. self.assertRaises( GitCommandError, cur_head.reset, @@ -243,12 +242,12 @@ class TestRefs(TestBase): paths="lib", ) - # we can do a mixed reset, and then checkout from the index though + # We can do a mixed reset, and then checkout from the index though. cur_head.reset(new_head_commit) rw_repo.index.checkout(["lib"], force=True) - # now that we have a write write repo, change the HEAD reference - its - # like git-reset --soft + # Now that we have a write write repo, change the HEAD reference - it's like + # "git-reset --soft". heads = rw_repo.heads assert heads for head in heads: @@ -259,7 +258,7 @@ class TestRefs(TestBase): assert not cur_head.is_detached # END for each head - # detach + # Detach. active_head = heads[0] curhead_commit = active_head.commit cur_head.reference = curhead_commit @@ -267,20 +266,20 @@ class TestRefs(TestBase): assert cur_head.is_detached self.assertRaises(TypeError, getattr, cur_head, "reference") - # tags are references, hence we can point to them + # Tags are references, hence we can point to them. some_tag = rw_repo.tags[0] cur_head.reference = some_tag assert not cur_head.is_detached assert cur_head.commit == some_tag.commit assert isinstance(cur_head.reference, TagReference) - # put HEAD back to a real head, otherwise everything else fails + # Put HEAD back to a real head, otherwise everything else fails. cur_head.reference = active_head - # type check + # Type check. self.assertRaises(ValueError, setattr, cur_head, "reference", "that") - # head handling + # Head handling. commit = "HEAD" prev_head_commit = cur_head.commit for count, new_name in enumerate(("my_new_head", "feature/feature1")): @@ -289,13 +288,13 @@ class TestRefs(TestBase): assert new_head.is_detached assert cur_head.commit == prev_head_commit assert isinstance(new_head, Head) - # already exists, but has the same value, so its fine + # Already exists, but has the same value, so it's fine. Head.create(rw_repo, new_name, new_head.commit) - # its not fine with a different value + # It's not fine with a different value. self.assertRaises(OSError, Head.create, rw_repo, new_name, new_head.commit.parents[0]) - # force it + # Force it. new_head = Head.create(rw_repo, new_name, actual_commit, force=True) old_path = new_head.path old_name = new_head.name @@ -304,7 +303,7 @@ class TestRefs(TestBase): assert new_head.rename("hello/world").name == "hello/world" assert new_head.rename(old_name).name == old_name and new_head.path == old_path - # rename with force + # Rename with force. tmp_head = Head.create(rw_repo, "tmphead") self.assertRaises(GitCommandError, tmp_head.rename, new_head) tmp_head.rename(new_head, force=True) @@ -313,15 +312,15 @@ class TestRefs(TestBase): logfile = RefLog.path(tmp_head) assert osp.isfile(logfile) Head.delete(rw_repo, tmp_head) - # deletion removes the log as well + # Deletion removes the log as well. assert not osp.isfile(logfile) heads = rw_repo.heads assert tmp_head not in heads and new_head not in heads - # force on deletion testing would be missing here, code looks okay though ;) + # Force on deletion testing would be missing here, code looks okay though. ;) # END for each new head name self.assertRaises(TypeError, RemoteReference.create, rw_repo, "some_name") - # tag ref + # Tag ref. tag_name = "5.0.2" TagReference.create(rw_repo, tag_name) self.assertRaises(GitCommandError, TagReference.create, rw_repo, tag_name) @@ -331,7 +330,7 @@ class TestRefs(TestBase): assert light_tag.commit == cur_head.commit.parents[0] assert light_tag.tag is None - # tag with tag object + # Tag with tag object. other_tag_name = "releases/1.0.2RC" msg = "my mighty tag\nsecond line" obj_tag = TagReference.create(rw_repo, other_tag_name, message=msg) @@ -344,15 +343,15 @@ class TestRefs(TestBase): tags = rw_repo.tags assert light_tag not in tags and obj_tag not in tags - # remote deletion + # Remote deletion. remote_refs_so_far = 0 remotes = rw_repo.remotes assert remotes for remote in remotes: refs = remote.refs - # If a HEAD exists, it must be deleted first. Otherwise it might - # end up pointing to an invalid ref it the ref was deleted before. + # If a HEAD exists, it must be deleted first. Otherwise it might end up + # pointing to an invalid ref it the ref was deleted before. remote_head_name = "HEAD" if remote_head_name in refs: RemoteReference.delete(rw_repo, refs[remote_head_name]) @@ -367,11 +366,11 @@ class TestRefs(TestBase): assert remote_refs_so_far for remote in remotes: - # remotes without references should produce an empty list + # Remotes without references should produce an empty list. self.assertEqual(remote.refs, []) # END for each remote - # change where the active head points to + # Change where the active head points to. if cur_head.is_detached: cur_head.reference = rw_repo.heads[0] @@ -382,17 +381,17 @@ class TestRefs(TestBase): assert head.commit == cur_head.commit head.commit = old_commit - # setting a non-commit as commit fails, but succeeds as object + # Setting a non-commit as commit fails, but succeeds as object. head_tree = head.commit.tree self.assertRaises(ValueError, setattr, head, "commit", head_tree) - assert head.commit == old_commit # and the ref did not change - # we allow heads to point to any object + assert head.commit == old_commit # And the ref did not change. + # We allow heads to point to any object. head.object = head_tree assert head.object == head_tree - # cannot query tree as commit + # Cannot query tree as commit. self.assertRaises(TypeError, getattr, head, "commit") - # set the commit directly using the head. This would never detach the head + # Set the commit directly using the head. This would never detach the head. assert not cur_head.is_detached head.object = old_commit cur_head.reference = head.commit @@ -408,30 +407,30 @@ class TestRefs(TestBase): assert not cur_head.is_detached assert head.commit == parent_commit - # test checkout + # Test checkout. active_branch = rw_repo.active_branch for head in rw_repo.heads: checked_out_head = head.checkout() assert checked_out_head == head # END for each head to checkout - # checkout with branch creation + # Check out with branch creation. new_head = active_branch.checkout(b="new_head") assert active_branch != rw_repo.active_branch assert new_head == rw_repo.active_branch - # checkout with force as we have a changed a file - # clear file + # Checkout with force as we have a changed a file. + # Clear file. open(new_head.commit.tree.blobs[-1].abspath, "w").close() assert len(new_head.commit.diff(None)) - # create a new branch that is likely to touch the file we changed + # Create a new branch that is likely to touch the file we changed. far_away_head = rw_repo.create_head("far_head", "HEAD~100") self.assertRaises(GitCommandError, far_away_head.checkout) assert active_branch == active_branch.checkout(force=True) assert rw_repo.head.reference != far_away_head - # test reference creation + # Test reference creation. partial_ref = "sub/ref" full_ref = "refs/%s" % partial_ref ref = Reference.create(rw_repo, partial_ref) @@ -439,21 +438,21 @@ class TestRefs(TestBase): assert ref.object == rw_repo.head.commit self.assertRaises(OSError, Reference.create, rw_repo, full_ref, "HEAD~20") - # it works if it is at the same spot though and points to the same reference + # It works if it is at the same spot though and points to the same reference. assert Reference.create(rw_repo, full_ref, "HEAD").path == full_ref Reference.delete(rw_repo, full_ref) - # recreate the reference using a full_ref + # Recreate the reference using a full_ref. ref = Reference.create(rw_repo, full_ref) assert ref.path == full_ref assert ref.object == rw_repo.head.commit - # recreate using force + # Recreate using force. ref = Reference.create(rw_repo, partial_ref, "HEAD~1", force=True) assert ref.path == full_ref assert ref.object == rw_repo.head.commit.parents[0] - # rename it + # Rename it. orig_obj = ref.object for name in ("refs/absname", "rela_name", "feature/rela_name"): ref_new_name = ref.rename(name) @@ -463,19 +462,19 @@ class TestRefs(TestBase): assert ref_new_name == ref # END for each name type - # References that don't exist trigger an error if we want to access them + # References that don't exist trigger an error if we want to access them. self.assertRaises(ValueError, getattr, Reference(rw_repo, "refs/doesntexist"), "commit") - # exists, fail unless we force + # Exists, fail unless we force. ex_ref_path = far_away_head.path self.assertRaises(OSError, ref.rename, ex_ref_path) - # if it points to the same commit it works + # If it points to the same commit it works. far_away_head.commit = ref.commit ref.rename(ex_ref_path) assert ref.path == ex_ref_path and ref.object == orig_obj assert ref.rename(ref.path).path == ex_ref_path # rename to same name - # create symbolic refs + # Create symbolic refs. symref_path = "symrefs/sym" symref = SymbolicReference.create(rw_repo, symref_path, cur_head.reference) assert symref.path == symref_path @@ -488,20 +487,20 @@ class TestRefs(TestBase): symref_path, cur_head.reference.commit, ) - # it works if the new ref points to the same reference + # It works if the new ref points to the same reference. assert SymbolicReference.create(rw_repo, symref.path, symref.reference).path == symref.path SymbolicReference.delete(rw_repo, symref) - # would raise if the symref wouldn't have been deletedpbl + # Would raise if the symref wouldn't have been deleted (probably). symref = SymbolicReference.create(rw_repo, symref_path, cur_head.reference) - # test symbolic references which are not at default locations like HEAD - # or FETCH_HEAD - they may also be at spots in refs of course + # Test symbolic references which are not at default locations like HEAD or + # FETCH_HEAD - they may also be at spots in refs of course. symbol_ref_path = "refs/symbol_ref" symref = SymbolicReference(rw_repo, symbol_ref_path) assert symref.path == symbol_ref_path symbol_ref_abspath = osp.join(rw_repo.git_dir, symref.path) - # set it + # Set it. symref.reference = new_head assert symref.reference == new_head assert osp.isfile(symbol_ref_abspath) @@ -516,10 +515,10 @@ class TestRefs(TestBase): assert not symref.is_detached # END for each ref - # create a new non-head ref just to be sure we handle it even if packed + # Create a new non-head ref just to be sure we handle it even if packed. Reference.create(rw_repo, full_ref) - # test ref listing - assure we have packed refs + # Test ref listing - make sure we have packed refs. rw_repo.git.pack_refs(all=True, prune=True) heads = rw_repo.heads assert heads @@ -527,15 +526,14 @@ class TestRefs(TestBase): assert active_branch in heads assert rw_repo.tags - # we should be able to iterate all symbolic refs as well - in that case - # we should expect only symbolic references to be returned + # We should be able to iterate all symbolic refs as well - in that case we + # should expect only symbolic references to be returned. for symref in SymbolicReference.iter_items(rw_repo): assert not symref.is_detached - # when iterating references, we can get references and symrefs - # when deleting all refs, I'd expect them to be gone ! Even from - # the packed ones - # For this to work, we must not be on any branch + # When iterating references, we can get references and symrefs when deleting all + # refs, I'd expect them to be gone! Even from the packed ones. + # For this to work, we must not be on any branch. rw_repo.head.reference = rw_repo.head.commit deleted_refs = set() for ref in Reference.iter_items(rw_repo): @@ -551,16 +549,15 @@ class TestRefs(TestBase): assert ref not in deleted_refs # END for each ref - # reattach head - head will not be returned if it is not a symbolic - # ref + # Reattach head - head will not be returned if it is not a symbolic ref. rw_repo.head.reference = Head.create(rw_repo, "master") - # At least the head should still exist + # At least the head should still exist. assert osp.isfile(osp.join(rw_repo.git_dir, "HEAD")) refs = list(SymbolicReference.iter_items(rw_repo)) assert len(refs) == 1 - # test creation of new refs from scratch + # Test creation of new refs from scratch. for path in ("basename", "dir/somename", "dir2/subdir/basename"): # REFERENCES ############ @@ -570,19 +567,19 @@ class TestRefs(TestBase): ref = Reference(rw_repo, fpath) assert ref == ref_fp - # can be created by assigning a commit + # Can be created by assigning a commit. ref.commit = rw_repo.head.commit assert ref.is_valid() - # if the assignment raises, the ref doesn't exist + # If the assignment raises, the ref doesn't exist. Reference.delete(ref.repo, ref.path) assert not ref.is_valid() self.assertRaises(ValueError, setattr, ref, "commit", "nonsense") assert not ref.is_valid() - # I am sure I had my reason to make it a class method at first, but - # now it doesn't make so much sense anymore, want an instance method as well - # See http://byronimo.lighthouseapp.com/projects/51787-gitpython/tickets/27 + # I am sure I had my reason to make it a class method at first, but now it + # doesn't make so much sense anymore, want an instance method as well. See: + # http://byronimo.lighthouseapp.com/projects/51787-gitpython/tickets/27 Reference.delete(ref.repo, ref.path) assert not ref.is_valid() @@ -614,7 +611,7 @@ class TestRefs(TestBase): assert tag_ref.tag.message == "test2" def test_dereference_recursive(self): - # for now, just test the HEAD + # For now, just test the HEAD. assert SymbolicReference.dereference_recursive(self.rorepo, "HEAD") def test_reflog(self): @@ -622,8 +619,8 @@ class TestRefs(TestBase): def test_refs_outside_repo(self): # Create a file containing a valid reference outside the repository. Attempting - # to access it should raise an exception, due to it containing a parent directory - # reference ('..'). This tests for CVE-2023-41040. + # to access it should raise an exception, due to it containing a parent + # directory reference ('..'). This tests for CVE-2023-41040. git_dir = Path(self.rorepo.git_dir) repo_parent_dir = git_dir.parent.parent with tempfile.NamedTemporaryFile(dir=repo_parent_dir) as ref_file: @@ -633,37 +630,52 @@ class TestRefs(TestBase): self.assertRaises(BadName, self.rorepo.commit, f"../../{ref_file_name}") def test_validity_ref_names(self): + """Ensure ref names are checked for validity. + + This is based on the rules specified in: + https://git-scm.com/docs/git-check-ref-format/#_description + """ check_ref = SymbolicReference._check_ref_name_valid - # Based on the rules specified in https://git-scm.com/docs/git-check-ref-format/#_description + # Rule 1 self.assertRaises(ValueError, check_ref, ".ref/begins/with/dot") self.assertRaises(ValueError, check_ref, "ref/component/.begins/with/dot") self.assertRaises(ValueError, check_ref, "ref/ends/with/a.lock") self.assertRaises(ValueError, check_ref, "ref/component/ends.lock/with/period_lock") + # Rule 2 check_ref("valid_one_level_refname") + # Rule 3 self.assertRaises(ValueError, check_ref, "ref/contains/../double/period") + # Rule 4 for c in " ~^:": self.assertRaises(ValueError, check_ref, f"ref/contains/invalid{c}/character") for code in range(0, 32): self.assertRaises(ValueError, check_ref, f"ref/contains/invalid{chr(code)}/ASCII/control_character") self.assertRaises(ValueError, check_ref, f"ref/contains/invalid{chr(127)}/ASCII/control_character") + # Rule 5 for c in "*?[": self.assertRaises(ValueError, check_ref, f"ref/contains/invalid{c}/character") + # Rule 6 self.assertRaises(ValueError, check_ref, "/ref/begins/with/slash") self.assertRaises(ValueError, check_ref, "ref/ends/with/slash/") self.assertRaises(ValueError, check_ref, "ref/contains//double/slash/") + # Rule 7 self.assertRaises(ValueError, check_ref, "ref/ends/with/dot.") + # Rule 8 self.assertRaises(ValueError, check_ref, "ref/contains@{/at_brace") + # Rule 9 self.assertRaises(ValueError, check_ref, "@") + # Rule 10 self.assertRaises(ValueError, check_ref, "ref/contain\\s/backslash") - # Valid reference name should not raise + + # Valid reference name should not raise. check_ref("valid/ref/name") diff --git a/test/test_remote.py b/test/test_remote.py index 7144b27..5ddb41b 100644 --- a/test/test_remote.py +++ b/test/test_remote.py @@ -1,42 +1,43 @@ -# test_remote.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ +import gc +import os.path as osp +from pathlib import Path import random +import sys import tempfile -import pytest from unittest import skipIf +import pytest + from git import ( - RemoteProgress, + Commit, FetchInfo, - Reference, - SymbolicReference, + GitCommandError, Head, - Commit, PushInfo, + Reference, + Remote, + RemoteProgress, RemoteReference, + SymbolicReference, TagReference, - Remote, - GitCommandError, ) from git.cmd import Git -from pathlib import Path from git.exc import UnsafeOptionError, UnsafeProtocolError +from git.util import HIDE_WINDOWS_FREEZE_ERRORS, IterableList, rmtree from test.lib import ( + GIT_DAEMON_PORT, TestBase, - with_rw_repo, - with_rw_and_rw_remote_repo, fixture, - GIT_DAEMON_PORT, + with_rw_and_rw_remote_repo, + with_rw_repo, ) -from git.util import rmtree, HIDE_WINDOWS_FREEZE_ERRORS, IterableList -import os.path as osp - -# assure we have repeatable results +# Make sure we have repeatable results. random.seed(0) @@ -44,16 +45,16 @@ class TestRemoteProgress(RemoteProgress): __slots__ = ("_seen_lines", "_stages_per_op", "_num_progress_messages") def __init__(self): - super(TestRemoteProgress, self).__init__() + super().__init__() self._seen_lines = [] self._stages_per_op = {} self._num_progress_messages = 0 def _parse_progress_line(self, line): - # we may remove the line later if it is dropped - # Keep it for debugging + # We may remove the line later if it is dropped. + # Keep it for debugging. self._seen_lines.append(line) - rval = super(TestRemoteProgress, self)._parse_progress_line(line) + rval = super()._parse_progress_line(line) return rval def line_dropped(self, line): @@ -63,7 +64,7 @@ class TestRemoteProgress(RemoteProgress): pass def update(self, op_code, cur_count, max_count=None, message=""): - # check each stage only comes once + # Check each stage only comes once. op_id = op_code & self.OP_MASK assert op_id in (self.COUNTING, self.COMPRESSING, self.WRITING) @@ -85,15 +86,15 @@ class TestRemoteProgress(RemoteProgress): self._num_progress_messages += 1 def make_assertion(self): - # we don't always receive messages + # We don't always receive messages. if not self._seen_lines: return - # sometimes objects are not compressed which is okay + # Sometimes objects are not compressed which is okay. assert len(self._seen_ops) in (2, 3), len(self._seen_ops) assert self._stages_per_op - # must have seen all stages + # Must have seen all stages. for _op, stages in self._stages_per_op.items(): assert stages & self.STAGE_MASK == self.STAGE_MASK # END for each op/stage @@ -104,8 +105,6 @@ class TestRemoteProgress(RemoteProgress): class TestRemote(TestBase): def tearDown(self): - import gc - gc.collect() def _print_fetchhead(self, repo): @@ -151,7 +150,7 @@ class TestRemote(TestBase): # END for each bitflag self.assertTrue(has_one) else: - # there must be a remote commit + # There must be a remote commit. if info.flags & info.DELETED == 0: self.assertIsInstance(info.local_ref, Reference) else: @@ -163,7 +162,7 @@ class TestRemote(TestBase): if any(info.flags & info.ERROR for info in results): self.assertRaises(GitCommandError, results.raise_if_error) else: - # No errors, so this should do nothing + # No errors, so this should do nothing. results.raise_if_error() def _do_test_fetch_info(self, repo): @@ -177,8 +176,10 @@ class TestRemote(TestBase): ) def _commit_random_file(self, repo): - # Create a file with a random name and random data and commit it to repo. - # Return the committed absolute file path + """Create a file with a random name and random data and commit it to a repo. + + :return: The committed absolute file path. + """ index = repo.index new_file = self._make_file(osp.basename(tempfile.mktemp()), str(random.random()), repo) index.add([new_file]) @@ -186,7 +187,7 @@ class TestRemote(TestBase): return new_file def _do_test_fetch(self, remote, rw_repo, remote_repo, **kwargs): - # specialized fetch testing to de-clutter the main test + """Specialized fetch testing to de-clutter the main test.""" self._do_test_fetch_info(rw_repo) def fetch_and_test(remote, **kwargs): @@ -202,16 +203,16 @@ class TestRemote(TestBase): def get_info(res, remote, name): return res["%s/%s" % (remote, name)] - # put remote head to master as it is guaranteed to exist + # Put remote head to master as it is guaranteed to exist. remote_repo.head.reference = remote_repo.heads.master res = fetch_and_test(remote, **kwargs) - # all up to date + # All up to date. for info in res: self.assertTrue(info.flags & info.HEAD_UPTODATE) - # rewind remote head to trigger rejection - # index must be false as remote is a bare repo + # Rewind remote head to trigger rejection. + # index must be false as remote is a bare repo. rhead = remote_repo.head remote_commit = rhead.commit rhead.reset("HEAD~2", index=False) @@ -221,50 +222,50 @@ class TestRemote(TestBase): self.assertTrue(master_info.flags & FetchInfo.FORCED_UPDATE) self.assertIsNotNone(master_info.note) - # normal fast forward - set head back to previous one + # Normal fast forward - set head back to previous one. rhead.commit = remote_commit res = fetch_and_test(remote) self.assertTrue(res[mkey].flags & FetchInfo.FAST_FORWARD) - # new remote branch + # New remote branch. new_remote_branch = Head.create(remote_repo, "new_branch") res = fetch_and_test(remote) new_branch_info = get_info(res, remote, new_remote_branch) self.assertTrue(new_branch_info.flags & FetchInfo.NEW_HEAD) - # remote branch rename ( causes creation of a new one locally ) + # Remote branch rename (causes creation of a new one locally). new_remote_branch.rename("other_branch_name") res = fetch_and_test(remote) other_branch_info = get_info(res, remote, new_remote_branch) self.assertEqual(other_branch_info.ref.commit, new_branch_info.ref.commit) - # remove new branch + # Remove new branch. Head.delete(new_remote_branch.repo, new_remote_branch) res = fetch_and_test(remote) - # deleted remote will not be fetched + # Deleted remote will not be fetched. self.assertRaises(IndexError, get_info, res, remote, new_remote_branch) - # prune stale tracking branches + # Prune stale tracking branches. stale_refs = remote.stale_refs self.assertEqual(len(stale_refs), 2) self.assertIsInstance(stale_refs[0], RemoteReference) RemoteReference.delete(rw_repo, *stale_refs) - # test single branch fetch with refspec including target remote + # Test single branch fetch with refspec including target remote. res = fetch_and_test(remote, refspec="master:refs/remotes/%s/master" % remote) self.assertEqual(len(res), 1) self.assertTrue(get_info(res, remote, "master")) - # ... with respec and no target + # ...with respec and no target. res = fetch_and_test(remote, refspec="master") self.assertEqual(len(res), 1) - # ... multiple refspecs ... works, but git command returns with error if one ref is wrong without - # doing anything. This is new in later binaries + # ...multiple refspecs...works, but git command returns with error if one ref is + # wrong without doing anything. This is new in later binaries. # res = fetch_and_test(remote, refspec=['master', 'fred']) # self.assertEqual(len(res), 1) - # add new tag reference + # Add new tag reference. rtag = TagReference.create(remote_repo, "1.0-RV_hello.there") res = fetch_and_test(remote, tags=True) tinfo = res[str(rtag)] @@ -272,10 +273,10 @@ class TestRemote(TestBase): self.assertEqual(tinfo.ref.commit, rtag.commit) self.assertTrue(tinfo.flags & tinfo.NEW_TAG) - # adjust the local tag commit + # Adjust the local tag commit. Reference.set_object(rtag, rhead.commit.parents[0].parents[0]) - # as of git 2.20 one cannot clobber local tags that have changed without + # As of git 2.20 one cannot clobber local tags that have changed without # specifying --force, and the test assumes you can clobber, so... force = None if rw_repo.git.version_info[:2] >= (2, 20): @@ -285,63 +286,63 @@ class TestRemote(TestBase): self.assertEqual(tinfo.commit, rtag.commit) self.assertTrue(tinfo.flags & tinfo.TAG_UPDATE) - # delete remote tag - local one will stay + # Delete remote tag - local one will stay. TagReference.delete(remote_repo, rtag) res = fetch_and_test(remote, tags=True) self.assertRaises(IndexError, get_info, res, remote, str(rtag)) - # provoke to receive actual objects to see what kind of output we have to - # expect. For that we need a remote transport protocol - # Create a new UN-shared repo and fetch into it after we pushed a change - # to the shared repo + # Provoke to receive actual objects to see what kind of output we have to + # expect. For that we need a remote transport protocol. + # Create a new UN-shared repo and fetch into it after we pushed a change to the + # shared repo. other_repo_dir = tempfile.mktemp("other_repo") - # must clone with a local path for the repo implementation not to freak out - # as it wants local paths only ( which I can understand ) + # Must clone with a local path for the repo implementation not to freak out as + # it wants local paths only (which I can understand). other_repo = remote_repo.clone(other_repo_dir, shared=False) remote_repo_url = osp.basename(remote_repo.git_dir) # git-daemon runs with appropriate `--base-path`. remote_repo_url = Git.polish_url("git://localhost:%s/%s" % (GIT_DAEMON_PORT, remote_repo_url)) - # put origin to git-url + # Put origin to git-url. other_origin = other_repo.remotes.origin with other_origin.config_writer as cw: cw.set("url", remote_repo_url) - # it automatically creates alternates as remote_repo is shared as well. - # It will use the transport though and ignore alternates when fetching + # It automatically creates alternates as remote_repo is shared as well. + # It will use the transport though and ignore alternates when fetching. # assert not other_repo.alternates # this would fail - # assure we are in the right state + # Ensure we are in the right state. rw_repo.head.reset(remote.refs.master, working_tree=True) try: self._commit_random_file(rw_repo) remote.push(rw_repo.head.reference) - # here I would expect to see remote-information about packing - # objects and so on. Unfortunately, this does not happen - # if we are redirecting the output - git explicitly checks for this - # and only provides progress information to ttys + # Here I would expect to see remote-information about packing objects and so + # on. Unfortunately, this does not happen if we are redirecting the output - + # git explicitly checks for this and only provides progress information to + # ttys. res = fetch_and_test(other_origin) finally: rmtree(other_repo_dir) # END test and cleanup def _assert_push_and_pull(self, remote, rw_repo, remote_repo): - # push our changes + # Push our changes. lhead = rw_repo.head - # assure we are on master and it is checked out where the remote is + # Ensure we are on master and it is checked out where the remote is. try: lhead.reference = rw_repo.heads.master except AttributeError: - # if the author is on a non-master branch, the clones might not have - # a local master yet. We simply create it + # If the author is on a non-master branch, the clones might not have a local + # master yet. We simply create it. lhead.reference = rw_repo.create_head("master") # END master handling lhead.reset(remote.refs.master, working_tree=True) - # push without spec should fail ( without further configuration ) + # Push without spec should fail (without further configuration) # well, works nicely # self.assertRaises(GitCommandError, remote.push) - # simple file push + # Simple file push. self._commit_random_file(rw_repo) progress = TestRemoteProgress() res = remote.push(lhead.reference, progress) @@ -349,23 +350,23 @@ class TestRemote(TestBase): self._do_test_push_result(res, remote) progress.make_assertion() - # rejected - undo last commit + # Rejected - undo last commit. lhead.reset("HEAD~1") res = remote.push(lhead.reference) self.assertTrue(res[0].flags & PushInfo.ERROR) self.assertTrue(res[0].flags & PushInfo.REJECTED) self._do_test_push_result(res, remote) - # force rejected pull + # Force rejected pull. res = remote.push("+%s" % lhead.reference) self.assertEqual(res[0].flags & PushInfo.ERROR, 0) self.assertTrue(res[0].flags & PushInfo.FORCED_UPDATE) self._do_test_push_result(res, remote) - # invalid refspec + # Invalid refspec. self.assertRaises(GitCommandError, remote.push, "hellothere") - # push new tags + # Push new tags. progress = TestRemoteProgress() to_be_updated = "my_tag.1.0RV" new_tag = TagReference.create(rw_repo, to_be_updated) # @UnusedVariable @@ -375,28 +376,28 @@ class TestRemote(TestBase): progress.make_assertion() self._do_test_push_result(res, remote) - # update push new tags - # Rejection is default + # Update push new tags. + # Rejection is default. new_tag = TagReference.create(rw_repo, to_be_updated, reference="HEAD~1", force=True) res = remote.push(tags=True) self._do_test_push_result(res, remote) self.assertTrue(res[-1].flags & PushInfo.REJECTED) self.assertTrue(res[-1].flags & PushInfo.ERROR) - # push force this tag + # Force push this tag. res = remote.push("+%s" % new_tag.path) self.assertEqual(res[-1].flags & PushInfo.ERROR, 0) self.assertTrue(res[-1].flags & PushInfo.FORCED_UPDATE) - # delete tag - have to do it using refspec + # Delete tag - have to do it using refspec. res = remote.push(":%s" % new_tag.path) self._do_test_push_result(res, remote) self.assertTrue(res[0].flags & PushInfo.DELETED) # Currently progress is not properly transferred, especially not using - # the git daemon + # the git daemon. # progress.assert_received_message() - # push new branch + # Push new branch. new_head = Head.create(rw_repo, "my_new_branch") progress = TestRemoteProgress() res = remote.push(new_head, progress) @@ -405,7 +406,7 @@ class TestRemote(TestBase): progress.make_assertion() self._do_test_push_result(res, remote) - # rejected stale delete + # Rejected stale delete. force_with_lease = "%s:0000000000000000000000000000000000000000" % new_head.path res = remote.push(":%s" % new_head.path, force_with_lease=force_with_lease) self.assertTrue(res[0].flags & PushInfo.ERROR) @@ -413,7 +414,7 @@ class TestRemote(TestBase): self.assertIsNone(res[0].local_ref) self._do_test_push_result(res, remote) - # delete new branch on the remote end and locally + # Delete new branch on the remote end and locally. res = remote.push(":%s" % new_head.path) self._do_test_push_result(res, remote) Head.delete(rw_repo, new_head) @@ -425,8 +426,8 @@ class TestRemote(TestBase): remote.pull("master", kill_after_timeout=10.0) - # cleanup - delete created tags and branches as we are in an innerloop on - # the same repository + # Cleanup - delete created tags and branches as we are in an inner loop on + # the same repository. TagReference.delete(rw_repo, new_tag, other_tag) remote.push(":%s" % other_tag.path, kill_after_timeout=10.0) @@ -442,7 +443,7 @@ class TestRemote(TestBase): self.assertEqual(remote, remote) self.assertNotEqual(str(remote), repr(remote)) remote_set.add(remote) - remote_set.add(remote) # should already exist + remote_set.add(remote) # Should already exist. # REFS refs = remote.refs self.assertTrue(refs) @@ -452,17 +453,17 @@ class TestRemote(TestBase): # END for each ref # OPTIONS - # cannot use 'fetch' key anymore as it is now a method + # Cannot use 'fetch' key anymore as it is now a method. for opt in ("url",): val = getattr(remote, opt) reader = remote.config_reader assert reader.get(opt) == val assert reader.get_value(opt, None) == val - # unable to write with a reader + # Unable to write with a reader. self.assertRaises(IOError, reader.set, opt, "test") - # change value + # Change value. with remote.config_writer as writer: new_val = "myval" writer.set(opt, new_val) @@ -477,7 +478,7 @@ class TestRemote(TestBase): prev_name = remote.name self.assertEqual(remote.rename(other_name), remote) self.assertNotEqual(prev_name, remote.name) - # multiple times + # Multiple times. for _ in range(2): self.assertEqual(remote.rename(prev_name).name, prev_name) # END for each rename ( back to prev_name ) @@ -486,8 +487,8 @@ class TestRemote(TestBase): self._assert_push_and_pull(remote, rw_repo, remote_repo) # FETCH TESTING - # Only for remotes - local cases are the same or less complicated - # as additional progress information will never be emitted + # Only for remotes - local cases are the same or less complicated as + # additional progress information will never be emitted. if remote.name == "daemon_origin": self._do_test_fetch(remote, rw_repo, remote_repo, kill_after_timeout=10.0) ran_fetch_test = True @@ -503,10 +504,11 @@ class TestRemote(TestBase): origin = rw_repo.remote("origin") assert origin == rw_repo.remotes.origin - # Verify we can handle prunes when fetching + # Verify we can handle prunes when fetching. # stderr lines look like this: x [deleted] (none) -> origin/experiment-2012 - # These should just be skipped - # If we don't have a manual checkout, we can't actually assume there are any non-master branches + # These should just be skipped. + # If we don't have a manual checkout, we can't actually assume there are any + # non-master branches. remote_repo.create_head("myone_for_deletion") # Get the branch - to be pruned later origin.fetch() @@ -516,8 +518,7 @@ class TestRemote(TestBase): if branch.name != "master": branch.delete(remote_repo, branch, force=True) num_deleted += 1 - # end - # end for each branch + # END for each branch self.assertGreater(num_deleted, 0) self.assertEqual( len(rw_repo.remotes.origin.fetch(prune=True)), @@ -534,13 +535,13 @@ class TestRemote(TestBase): self.assertIn(remote, bare_rw_repo.remotes) self.assertTrue(remote.exists()) - # create same one again + # Create same one again. self.assertRaises(GitCommandError, Remote.create, bare_rw_repo, *arg_list) Remote.remove(bare_rw_repo, new_name) - self.assertTrue(remote.exists()) # We still have a cache that doesn't know we were deleted by name + self.assertTrue(remote.exists()) # We still have a cache that doesn't know we were deleted by name. remote._clear_cache() - assert not remote.exists() # Cache should be renewed now. This is an issue ... + assert not remote.exists() # Cache should be renewed now. This is an issue... for remote in bare_rw_repo.remotes: if remote.name == new_name: @@ -548,11 +549,11 @@ class TestRemote(TestBase): # END if deleted remote matches existing remote's name # END for each remote - # Issue #262 - the next call would fail if bug wasn't fixed + # Issue #262 - the next call would fail if bug wasn't fixed. bare_rw_repo.create_remote("bogus", "/bogus/path", mirror="push") def test_fetch_info(self): - # assure we can handle remote-tracking branches + # Ensure we can handle remote-tracking branches. fetch_info_line_fmt = "c437ee5deb8d00cf02f03720693e4c802e99f390 not-for-merge %s '0.3' of " fetch_info_line_fmt += "git://github.com/gitpython-developers/GitPython" remote_info_line_fmt = "* [new branch] nomatter -> %s" @@ -573,8 +574,8 @@ class TestRemote(TestBase): assert not fi.ref.is_valid() self.assertEqual(fi.ref.name, "local/master") - # handles non-default refspecs: One can specify a different path in refs/remotes - # or a special path just in refs/something for instance + # Handles non-default refspecs: One can specify a different path in refs/remotes + # or a special path just in refs/something for instance. fi = FetchInfo._from_line( self.rorepo, @@ -585,7 +586,7 @@ class TestRemote(TestBase): self.assertIsInstance(fi.ref, TagReference) assert fi.ref.path.startswith("refs/tags"), fi.ref.path - # it could be in a remote direcftory though + # It could be in a remote directory though. fi = FetchInfo._from_line( self.rorepo, remote_info_line_fmt % "remotename/tags/tagname", @@ -595,14 +596,14 @@ class TestRemote(TestBase): self.assertIsInstance(fi.ref, TagReference) assert fi.ref.path.startswith("refs/remotes/"), fi.ref.path - # it can also be anywhere ! + # It can also be anywhere! tag_path = "refs/something/remotename/tags/tagname" fi = FetchInfo._from_line(self.rorepo, remote_info_line_fmt % tag_path, fetch_info_line_fmt % "tag") self.assertIsInstance(fi.ref, TagReference) self.assertEqual(fi.ref.path, tag_path) - # branches default to refs/remotes + # Branches default to refs/remotes. fi = FetchInfo._from_line( self.rorepo, remote_info_line_fmt % "remotename/branch", @@ -612,7 +613,7 @@ class TestRemote(TestBase): self.assertIsInstance(fi.ref, RemoteReference) self.assertEqual(fi.ref.remote_name, "remotename") - # but you can force it anywhere, in which case we only have a references + # But you can force it anywhere, in which case we only have a references. fi = FetchInfo._from_line( self.rorepo, remote_info_line_fmt % "refs/something/branch", @@ -639,46 +640,46 @@ class TestRemote(TestBase): @with_rw_repo("HEAD", bare=False) def test_multiple_urls(self, rw_repo): - # test addresses + # Test addresses. test1 = "https://github.com/gitpython-developers/GitPython" test2 = "https://github.com/gitpython-developers/gitdb" test3 = "https://github.com/gitpython-developers/smmap" remote = rw_repo.remotes[0] - # Testing setting a single URL + # Test setting a single URL. remote.set_url(test1) self.assertEqual(list(remote.urls), [test1]) - # Testing replacing that single URL + # Test replacing that single URL. remote.set_url(test1) self.assertEqual(list(remote.urls), [test1]) - # Testing adding new URLs + # Test adding new URLs. remote.set_url(test2, add=True) self.assertEqual(list(remote.urls), [test1, test2]) remote.set_url(test3, add=True) self.assertEqual(list(remote.urls), [test1, test2, test3]) - # Testing removing an URL + # Test removing a URL. remote.set_url(test2, delete=True) self.assertEqual(list(remote.urls), [test1, test3]) - # Testing changing an URL + # Test changing a URL. remote.set_url(test2, test3) self.assertEqual(list(remote.urls), [test1, test2]) # will raise: fatal: --add --delete doesn't make sense self.assertRaises(GitCommandError, remote.set_url, test2, add=True, delete=True) - # Testing on another remote, with the add/delete URL + # Test on another remote, with the add/delete URL. remote = rw_repo.create_remote("another", url=test1) remote.add_url(test2) self.assertEqual(list(remote.urls), [test1, test2]) remote.add_url(test3) self.assertEqual(list(remote.urls), [test1, test2, test3]) - # Testing removing all the URLs + # Test removing all the URLs. remote.delete_url(test2) self.assertEqual(list(remote.urls), [test1, test3]) remote.delete_url(test1) self.assertEqual(list(remote.urls), [test3]) - # will raise fatal: Will not delete all non-push URLs + # Will raise fatal: Will not delete all non-push URLs. self.assertRaises(GitCommandError, remote.delete_url, test3) def test_fetch_error(self): @@ -766,6 +767,11 @@ class TestRemote(TestBase): Remote.create(rw_repo, "origin", url) assert not tmp_file.exists() + @pytest.mark.xfail( + sys.platform == "win32", + reason=R"Multiple '\' instead of '/' in remote.url make it differ from expected value", + raises=AssertionError, + ) @with_rw_repo("HEAD") def test_create_remote_unsafe_url_allowed(self, rw_repo): with tempfile.TemporaryDirectory() as tdir: @@ -806,8 +812,8 @@ class TestRemote(TestBase): "fd::17/foo", ] for url in urls: - # The URL will be allowed into the command, but the command will - # fail since we don't have that protocol enabled in the Git config file. + # The URL will be allowed into the command, but the command will fail + # since we don't have that protocol enabled in the Git config file. with self.assertRaises(GitCommandError): remote.fetch(url, allow_unsafe_protocols=True) assert not tmp_file.exists() @@ -824,6 +830,15 @@ class TestRemote(TestBase): remote.fetch(**unsafe_option) assert not tmp_file.exists() + @pytest.mark.xfail( + sys.platform == "win32", + reason=( + "File not created. A separate Windows command may be needed. This and the " + "currently passing test test_fetch_unsafe_options must be adjusted in the " + "same way. Until then, test_fetch_unsafe_options is unreliable on Windows." + ), + raises=AssertionError, + ) @with_rw_repo("HEAD") def test_fetch_unsafe_options_allowed(self, rw_repo): with tempfile.TemporaryDirectory() as tdir: @@ -865,8 +880,8 @@ class TestRemote(TestBase): "fd::17/foo", ] for url in urls: - # The URL will be allowed into the command, but the command will - # fail since we don't have that protocol enabled in the Git config file. + # The URL will be allowed into the command, but the command will fail + # since we don't have that protocol enabled in the Git config file. with self.assertRaises(GitCommandError): remote.pull(url, allow_unsafe_protocols=True) assert not tmp_file.exists() @@ -883,6 +898,15 @@ class TestRemote(TestBase): remote.pull(**unsafe_option) assert not tmp_file.exists() + @pytest.mark.xfail( + sys.platform == "win32", + reason=( + "File not created. A separate Windows command may be needed. This and the " + "currently passing test test_pull_unsafe_options must be adjusted in the " + "same way. Until then, test_pull_unsafe_options is unreliable on Windows." + ), + raises=AssertionError, + ) @with_rw_repo("HEAD") def test_pull_unsafe_options_allowed(self, rw_repo): with tempfile.TemporaryDirectory() as tdir: @@ -924,8 +948,8 @@ class TestRemote(TestBase): "fd::17/foo", ] for url in urls: - # The URL will be allowed into the command, but the command will - # fail since we don't have that protocol enabled in the Git config file. + # The URL will be allowed into the command, but the command will fail + # since we don't have that protocol enabled in the Git config file. with self.assertRaises(GitCommandError): remote.push(url, allow_unsafe_protocols=True) assert not tmp_file.exists() @@ -948,6 +972,15 @@ class TestRemote(TestBase): remote.push(**unsafe_option) assert not tmp_file.exists() + @pytest.mark.xfail( + sys.platform == "win32", + reason=( + "File not created. A separate Windows command may be needed. This and the " + "currently passing test test_push_unsafe_options must be adjusted in the " + "same way. Until then, test_push_unsafe_options is unreliable on Windows." + ), + raises=AssertionError, + ) @with_rw_repo("HEAD") def test_push_unsafe_options_allowed(self, rw_repo): with tempfile.TemporaryDirectory() as tdir: @@ -968,18 +1001,33 @@ class TestRemote(TestBase): assert tmp_file.exists() tmp_file.unlink() + @with_rw_and_rw_remote_repo("0.1.6") + def test_fetch_unsafe_branch_name(self, rw_repo, remote_repo): + # Create branch with a name containing a NBSP + bad_branch_name = f"branch_with_{chr(160)}_nbsp" + Head.create(remote_repo, bad_branch_name) + + # Fetch and get branches + remote = rw_repo.remote("origin") + branches = remote.fetch() + + # Test for truncated branch name in branches + assert f"origin/{bad_branch_name}" in [b.name for b in branches] + + # Cleanup branch + Head.delete(remote_repo, bad_branch_name) + class TestTimeouts(TestBase): @with_rw_repo("HEAD", bare=False) def test_timeout_funcs(self, repo): - # Force error code to prevent a race condition if the python thread is - # slow + # Force error code to prevent a race condition if the python thread is slow. default = Git.AutoInterrupt._status_code_if_terminate Git.AutoInterrupt._status_code_if_terminate = -15 - for function in ["pull", "fetch"]: # can't get push to timeout + for function in ["pull", "fetch"]: # Can't get push to time out. f = getattr(repo.remotes.origin, function) - assert f is not None # Make sure these functions exist - _ = f() # Make sure the function runs + assert f is not None # Make sure these functions exist. + _ = f() # Make sure the function runs. with pytest.raises(GitCommandError, match="kill_after_timeout=0 s"): f(kill_after_timeout=0) diff --git a/test/test_repo.py b/test/test_repo.py index 15899ec..e38da5b 100644 --- a/test/test_repo.py +++ b/test/test_repo.py @@ -1,52 +1,46 @@ -# -*- coding: utf-8 -*- -# test_repo.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +import gc import glob import io from io import BytesIO import itertools import os +import os.path as osp import pathlib import pickle import sys import tempfile -from unittest import mock, skipIf, SkipTest, skip +from unittest import mock, skip import pytest from git import ( + BadName, + Commit, + Git, + GitCmdObjectDB, + GitCommandError, + GitDB, + Head, + IndexFile, InvalidGitRepositoryError, - Repo, NoSuchPathError, - Head, - Commit, Object, - Tree, - IndexFile, - Git, Reference, - GitDB, - Submodule, - GitCmdObjectDB, Remote, - BadName, - GitCommandError, -) -from git.exc import ( - BadObject, - UnsafeOptionError, - UnsafeProtocolError, + Repo, + Submodule, + Tree, ) +from git.exc import BadObject, UnsafeOptionError, UnsafeProtocolError from git.repo.fun import touch -from test.lib import TestBase, with_rw_repo, fixture -from git.util import HIDE_WINDOWS_KNOWN_ERRORS, cygpath -from test.lib import with_rw_directory -from git.util import join_path_native, rmtree, rmfile, bin_to_hex +from git.util import bin_to_hex, cwd, cygpath, join_path_native, rmfile, rmtree -import os.path as osp +from test.lib import TestBase, fixture, with_rw_directory, with_rw_repo def iter_flatten(lol): @@ -75,15 +69,28 @@ class TestRepo(TestBase): for lfp in glob.glob(_tc_lock_fpaths): if osp.isfile(lfp): raise AssertionError("Previous TC left hanging git-lock file: {}".format(lfp)) - import gc gc.collect() def test_new_should_raise_on_invalid_repo_location(self): - self.assertRaises(InvalidGitRepositoryError, Repo, tempfile.gettempdir()) + # Ideally this tests a directory that is outside of any repository. In the rare + # case tempfile.gettempdir() is inside a repo, this still passes, but tests the + # same scenario as test_new_should_raise_on_invalid_repo_location_within_repo. + with tempfile.TemporaryDirectory() as tdir: + self.assertRaises(InvalidGitRepositoryError, Repo, tdir) + + @with_rw_directory + def test_new_should_raise_on_invalid_repo_location_within_repo(self, rw_dir): + repo_dir = osp.join(rw_dir, "repo") + Repo.init(repo_dir) + subdir = osp.join(repo_dir, "subdir") + os.mkdir(subdir) + self.assertRaises(InvalidGitRepositoryError, Repo, subdir) def test_new_should_raise_on_non_existent_path(self): - self.assertRaises(NoSuchPathError, Repo, "repos/foobar") + with tempfile.TemporaryDirectory() as tdir: + nonexistent = osp.join(tdir, "foobar") + self.assertRaises(NoSuchPathError, Repo, nonexistent) @with_rw_repo("0.3.2.1") def test_repo_creation_from_different_paths(self, rw_repo): @@ -122,7 +129,7 @@ class TestRepo(TestBase): self.assertEqual(tree.type, "tree") self.assertEqual(self.rorepo.tree(tree), tree) - # try from invalid revision that does not exist + # Try from an invalid revision that does not exist. self.assertRaises(BadName, self.rorepo.tree, "hello world") def test_pickleable(self): @@ -165,28 +172,29 @@ class TestRepo(TestBase): self.assertEqual(num_trees, mc) def _assert_empty_repo(self, repo): - # test all kinds of things with an empty, freshly initialized repo. - # It should throw good errors + """Test all kinds of things with an empty, freshly initialized repo. - # entries should be empty + It should throw good errors. + """ + # Entries should be empty. self.assertEqual(len(repo.index.entries), 0) - # head is accessible + # head is accessible. assert repo.head assert repo.head.ref assert not repo.head.is_valid() - # we can change the head to some other ref + # We can change the head to some other ref. head_ref = Head.from_path(repo, Head.to_full_path("some_head")) assert not head_ref.is_valid() repo.head.ref = head_ref - # is_dirty can handle all kwargs + # is_dirty can handle all kwargs. for args in ((1, 0, 0), (0, 1, 0), (0, 0, 1)): assert not repo.is_dirty(*args) # END for each arg - # we can add a file to the index ( if we are not bare ) + # We can add a file to the index (if we are not bare). if not repo.bare: pass # END test repos with working tree @@ -203,7 +211,7 @@ class TestRepo(TestBase): @with_rw_directory def test_date_format(self, rw_dir): repo = Repo.init(osp.join(rw_dir, "repo")) - # @-timestamp is the format used by git commit hooks + # @-timestamp is the format used by git commit hooks. repo.index.commit("Commit messages", commit_date="@1400000000 +0000") @with_rw_directory @@ -264,7 +272,7 @@ class TestRepo(TestBase): ) except GitCommandError as err: assert password not in str(err), "The error message '%s' should not contain the password" % err - # Working example from a blank private project + # Working example from a blank private project. Repo.clone_from( url="https://gitlab+deploy-token-392045:mLWhVus7bjLsy8xj8q2V@gitlab.com/mercierm/test_git_python", to_path=rw_dir, @@ -297,6 +305,15 @@ class TestRepo(TestBase): rw_repo.clone(tmp_dir, **unsafe_option) assert not tmp_file.exists() + @pytest.mark.xfail( + sys.platform == "win32", + reason=( + "File not created. A separate Windows command may be needed. This and the " + "currently passing test test_clone_unsafe_options must be adjusted in the " + "same way. Until then, test_clone_unsafe_options is unreliable on Windows." + ), + raises=AssertionError, + ) @with_rw_repo("HEAD") def test_clone_unsafe_options_allowed(self, rw_repo): with tempfile.TemporaryDirectory() as tdir: @@ -367,6 +384,15 @@ class TestRepo(TestBase): Repo.clone_from(rw_repo.working_dir, tmp_dir, **unsafe_option) assert not tmp_file.exists() + @pytest.mark.xfail( + sys.platform == "win32", + reason=( + "File not created. A separate Windows command may be needed. This and the " + "currently passing test test_clone_from_unsafe_options must be adjusted in the " + "same way. Until then, test_clone_from_unsafe_options is unreliable on Windows." + ), + raises=AssertionError, + ) @with_rw_repo("HEAD") def test_clone_from_unsafe_options_allowed(self, rw_repo): with tempfile.TemporaryDirectory() as tdir: @@ -492,13 +518,11 @@ class TestRepo(TestBase): repo.git.log(n=100, output_stream=TestOutputStream(io.DEFAULT_BUFFER_SIZE)) def test_init(self): - prev_cwd = os.getcwd() - os.chdir(tempfile.gettempdir()) - git_dir_rela = "repos/foo/bar.git" - del_dir_abs = osp.abspath("repos") - git_dir_abs = osp.abspath(git_dir_rela) - try: - # with specific path + with tempfile.TemporaryDirectory() as tdir, cwd(tdir): + git_dir_rela = "repos/foo/bar.git" + git_dir_abs = osp.abspath(git_dir_rela) + + # With specific path for path in (git_dir_rela, git_dir_abs): r = Repo.init(path=path, bare=True) self.assertIsInstance(r, Repo) @@ -508,7 +532,7 @@ class TestRepo(TestBase): self._assert_empty_repo(r) - # test clone + # Test clone clone_path = path + "_clone" rc = r.clone(clone_path) self._assert_empty_repo(rc) @@ -516,12 +540,12 @@ class TestRepo(TestBase): try: rmtree(clone_path) except OSError: - # when relative paths are used, the clone may actually be inside - # of the parent directory + # When relative paths are used, the clone may actually be inside of + # the parent directory. pass # END exception handling - # try again, this time with the absolute version + # Try again, this time with the absolute version. rc = Repo.clone_from(r.git_dir, clone_path) self._assert_empty_repo(rc) @@ -529,8 +553,8 @@ class TestRepo(TestBase): try: rmtree(clone_path) except OSError: - # when relative paths are used, the clone may actually be inside - # of the parent directory + # When relative paths are used, the clone may actually be inside of + # the parent directory. pass # END exception handling @@ -543,13 +567,6 @@ class TestRepo(TestBase): assert not r.has_separate_working_tree() self._assert_empty_repo(r) - finally: - try: - rmtree(del_dir_abs) - except OSError: - pass - os.chdir(prev_cwd) - # END restore previous state def test_bare_property(self): self.rorepo.bare @@ -647,11 +664,10 @@ class TestRepo(TestBase): self.assertEqual(value_errors, []) def test_archive(self): - tmpfile = tempfile.mktemp(suffix="archive-test") - with open(tmpfile, "wb") as stream: + with tempfile.NamedTemporaryFile("wb", suffix="archive-test", delete=False) as stream: self.rorepo.archive(stream, "0.1.6", path="doc") assert stream.tell() - os.remove(tmpfile) + os.remove(stream.name) # Do it this way so we can inspect the file on failure. @mock.patch.object(Git, "_call_process") def test_should_display_blame_information(self, git): @@ -677,11 +693,11 @@ class TestRepo(TestBase): lambda: c.message, ) - # test the 'lines per commit' entries + # Test the 'lines per commit' entries. tlist = b[0][1] self.assertTrue(tlist) self.assertTrue(isinstance(tlist[0], str)) - self.assertTrue(len(tlist) < sum(len(t) for t in tlist)) # test for single-char bug + self.assertTrue(len(tlist) < sum(len(t) for t in tlist)) # Test for single-char bug. # BINARY BLAME git.return_value = fixture("blame_binary") @@ -690,7 +706,7 @@ class TestRepo(TestBase): def test_blame_real(self): c = 0 - nml = 0 # amount of multi-lines per blame + nml = 0 # Amount of multi-lines per blame. for item in self.rorepo.head.commit.tree.traverse( predicate=lambda i, d: i.type == "blob" and i.path.endswith(".py") ): @@ -704,14 +720,14 @@ class TestRepo(TestBase): @mock.patch.object(Git, "_call_process") def test_blame_incremental(self, git): - # loop over two fixtures, create a test fixture for 2.11.1+ syntax + # Loop over two fixtures, create a test fixture for 2.11.1+ syntax. for git_fixture in ("blame_incremental", "blame_incremental_2.11.1_plus"): git.return_value = fixture(git_fixture) blame_output = self.rorepo.blame_incremental("9debf6b0aafb6f7781ea9d1383c86939a1aacde3", "AUTHORS") blame_output = list(blame_output) self.assertEqual(len(blame_output), 5) - # Check all outputted line numbers + # Check all outputted line numbers. ranges = flatten([entry.linenos for entry in blame_output]) self.assertEqual( ranges, @@ -729,13 +745,13 @@ class TestRepo(TestBase): commits = [entry.commit.hexsha[:7] for entry in blame_output] self.assertEqual(commits, ["82b8902", "82b8902", "c76852d", "c76852d", "c76852d"]) - # Original filenames + # Original filenames. self.assertSequenceEqual( [entry.orig_path for entry in blame_output], ["AUTHORS"] * len(blame_output), ) - # Original line numbers + # Original line numbers. orig_ranges = flatten([entry.orig_linenos for entry in blame_output]) self.assertEqual( orig_ranges, @@ -748,7 +764,7 @@ class TestRepo(TestBase): range(13, 15), ] ), - ) # noqa E501 + ) @mock.patch.object(Git, "_call_process") def test_blame_complex_revision(self, git): @@ -764,16 +780,6 @@ class TestRepo(TestBase): self.rorepo.blame("HEAD", "README.md", rev_opts=["-M", "-C", "-C"]) git.assert_called_once_with(*expected_args, **boilerplate_kwargs) - @skipIf( - HIDE_WINDOWS_KNOWN_ERRORS and Git.is_cygwin(), - """FIXME: File "C:\\projects\\gitpython\\git\\cmd.py", line 671, in execute - raise GitCommandError(command, status, stderr_value, stdout_value) - GitCommandError: Cmd('git') failed due to: exit code(128) - cmdline: git add 1__��ava verb��ten 1_test _myfile 1_test_other_file - 1_��ava-----verb��ten - stderr: 'fatal: pathspec '"1__çava verböten"' did not match any files' - """, - ) @with_rw_repo("HEAD", bare=False) def test_untracked_files(self, rwrepo): for run, repo_add in enumerate((rwrepo.index.add, rwrepo.git.add)): @@ -792,7 +798,7 @@ class TestRepo(TestBase): untracked_files = rwrepo.untracked_files num_recently_untracked = len(untracked_files) - # assure we have all names - they are relative to the git-dir + # Ensure we have all names - they are relative to the git-dir. num_test_untracked = 0 for utfile in untracked_files: num_test_untracked += join_path_native(base, utfile) in files @@ -800,12 +806,12 @@ class TestRepo(TestBase): repo_add(untracked_files) self.assertEqual(len(rwrepo.untracked_files), (num_recently_untracked - len(files))) - # end for each run + # END for each run def test_config_reader(self): - reader = self.rorepo.config_reader() # all config files + reader = self.rorepo.config_reader() # All config files. assert reader.read_only - reader = self.rorepo.config_reader("repository") # single config file + reader = self.rorepo.config_reader("repository") # Single config file. assert reader.read_only def test_config_writer(self): @@ -814,8 +820,8 @@ class TestRepo(TestBase): with self.rorepo.config_writer(config_level) as writer: self.assertFalse(writer.read_only) except IOError: - # its okay not to get a writer for some configuration files if we - # have no permissions + # It's okay not to get a writer for some configuration files if we + # have no permissions. pass def test_config_level_paths(self): @@ -823,8 +829,8 @@ class TestRepo(TestBase): assert self.rorepo._get_config_path(config_level) def test_creation_deletion(self): - # just a very quick test to assure it generally works. There are - # specialized cases in the test_refs module + # Just a very quick test to assure it generally works. There are specialized + # cases in the test_refs module. head = self.rorepo.create_head("new_head", "HEAD~1") self.rorepo.delete_head(head) @@ -840,7 +846,7 @@ class TestRepo(TestBase): self.rorepo.delete_remote(remote) def test_comparison_and_hash(self): - # this is only a preliminary test, more testing done in test_index + # This is only a preliminary test, more testing done in test_index. self.assertEqual(self.rorepo, self.rorepo) self.assertFalse(self.rorepo != self.rorepo) self.assertEqual(len({self.rorepo, self.rorepo}), 1) @@ -856,8 +862,8 @@ class TestRepo(TestBase): Repo.init(osp.join("$FOO", "test.git"), bare=True) def test_git_cmd(self): - # test CatFileContentStream, just to be very sure we have no fencepost errors - # last \n is the terminating newline that it expects + # Test CatFileContentStream, just to be very sure we have no fencepost errors. + # The last \n is the terminating newline that it expects. l1 = b"0123456789\n" l2 = b"abcdefghijklmnopqrstxy\n" l3 = b"z\n" @@ -865,8 +871,8 @@ class TestRepo(TestBase): l1p = l1[:5] - # full size - # size is without terminating newline + # Full size. + # Size is without terminating newline. def mkfull(): return Git.CatFileContentStream(len(d) - 1, BytesIO(d)) @@ -880,7 +886,7 @@ class TestRepo(TestBase): lines = s.readlines() self.assertEqual(len(lines), 3) self.assertTrue(lines[-1].endswith(b"\n"), lines[-1]) - self.assertEqual(s._stream.tell(), len(d)) # must have scrubbed to the end + self.assertEqual(s._stream.tell(), len(d)) # Must have scrubbed to the end. # realines line limit s = mkfull() @@ -923,7 +929,7 @@ class TestRepo(TestBase): s = mkfull() self.assertEqual(s.read(5), l1p) self.assertEqual(s.read(6), l1[5:]) - self.assertEqual(s._stream.tell(), 5 + 6) # its not yet done + self.assertEqual(s._stream.tell(), 5 + 6) # It's not yet done. # read tiny s = mktiny() @@ -938,7 +944,7 @@ class TestRepo(TestBase): if rev_obj.type == "tag": rev_obj = rev_obj.object - # tree and blob type + # Tree and blob type. obj = rev_parse(name + "^{tree}") self.assertEqual(obj, rev_obj.tree) @@ -958,13 +964,13 @@ class TestRepo(TestBase): obj = orig_obj # END deref tags by default - # try history + # Try history rev = name + "~" obj2 = rev_parse(rev) self.assertEqual(obj2, obj.parents[0]) self._assert_rev_parse_types(rev, obj2) - # history with number + # History with number ni = 11 history = [obj.parents[0]] for _ in range(ni): @@ -978,13 +984,13 @@ class TestRepo(TestBase): self._assert_rev_parse_types(rev, obj2) # END history check - # parent ( default ) + # Parent (default) rev = name + "^" obj2 = rev_parse(rev) self.assertEqual(obj2, obj.parents[0]) self._assert_rev_parse_types(rev, obj2) - # parent with number + # Parent with number for pn, parent in enumerate(obj.parents): rev = name + "^%i" % (pn + 1) self.assertEqual(rev_parse(rev), parent) @@ -995,17 +1001,17 @@ class TestRepo(TestBase): @with_rw_repo("HEAD", bare=False) def test_rw_rev_parse(self, rwrepo): - # verify it does not confuse branches with hexsha ids + # Verify it does not confuse branches with hexsha ids. ahead = rwrepo.create_head("aaaaaaaa") assert rwrepo.rev_parse(str(ahead)) == ahead.commit def test_rev_parse(self): rev_parse = self.rorepo.rev_parse - # try special case: This one failed at some point, make sure its fixed + # Try special case: This one failed at some point, make sure its fixed. self.assertEqual(rev_parse("33ebe").hexsha, "33ebe7acec14b25c5f84f35a664803fcab2f7781") - # start from reference + # Start from reference. num_resolved = 0 for ref_no, ref in enumerate(Reference.iter_items(self.rorepo)): @@ -1018,7 +1024,8 @@ class TestRepo(TestBase): num_resolved += 1 except (BadName, BadObject): print("failed on %s" % path_section) - # is fine, in case we have something like 112, which belongs to remotes/rname/merge-requests/112 + # This is fine if we have something like 112, which belongs to + # remotes/rname/merge-requests/112. # END exception handling # END for each token if ref_no == 3 - 1: @@ -1026,21 +1033,21 @@ class TestRepo(TestBase): # END for each reference assert num_resolved - # it works with tags ! + # It works with tags! tag = self._assert_rev_parse("0.1.4") self.assertEqual(tag.type, "tag") - # try full sha directly ( including type conversion ) + # try full sha directly (including type conversion). self.assertEqual(tag.object, rev_parse(tag.object.hexsha)) self._assert_rev_parse_types(tag.object.hexsha, tag.object) - # multiple tree types result in the same tree: HEAD^{tree}^{tree}:CHANGES + # Multiple tree types result in the same tree: HEAD^{tree}^{tree}:CHANGES rev = "0.1.4^{tree}^{tree}" self.assertEqual(rev_parse(rev), tag.object.tree) self.assertEqual(rev_parse(rev + ":CHANGES"), tag.object.tree["CHANGES"]) - # try to get parents from first revision - it should fail as no such revision - # exists + # Try to get parents from first revision - it should fail as no such revision + # exists. first_rev = "33ebe7acec14b25c5f84f35a664803fcab2f7781" commit = rev_parse(first_rev) self.assertEqual(len(commit.parents), 0) @@ -1048,14 +1055,14 @@ class TestRepo(TestBase): self.assertRaises(BadName, rev_parse, first_rev + "~") self.assertRaises(BadName, rev_parse, first_rev + "^") - # short SHA1 + # Short SHA1. commit2 = rev_parse(first_rev[:20]) self.assertEqual(commit2, commit) commit2 = rev_parse(first_rev[:5]) self.assertEqual(commit2, commit) - # todo: dereference tag into a blob 0.1.7^{blob} - quite a special one - # needs a tag which points to a blob + # TODO: Dereference tag into a blob 0.1.7^{blob} - quite a special one. + # Needs a tag which points to a blob. # ref^0 returns commit being pointed to, same with ref~0, and ^{} tag = rev_parse("0.1.4") @@ -1063,7 +1070,7 @@ class TestRepo(TestBase): self.assertEqual(tag.object, rev_parse("0.1.4%s" % token)) # END handle multiple tokens - # try partial parsing + # Try partial parsing. max_items = 40 for i, binsha in enumerate(self.rorepo.odb.sha_iter()): self.assertEqual( @@ -1071,41 +1078,41 @@ class TestRepo(TestBase): binsha, ) if i > max_items: - # this is rather slow currently, as rev_parse returns an object - # which requires accessing packs, it has some additional overhead + # This is rather slow currently, as rev_parse returns an object that + # requires accessing packs, so it has some additional overhead. break # END for each binsha in repo - # missing closing brace commit^{tree + # Missing closing brace: commit^{tree self.assertRaises(ValueError, rev_parse, "0.1.4^{tree") - # missing starting brace + # Missing starting brace. self.assertRaises(ValueError, rev_parse, "0.1.4^tree}") # REVLOG ####### head = self.rorepo.head - # need to specify a ref when using the @ syntax + # Need to specify a ref when using the @ syntax. self.assertRaises(BadObject, rev_parse, "%s@{0}" % head.commit.hexsha) - # uses HEAD.ref by default + # Uses HEAD.ref by default. self.assertEqual(rev_parse("@{0}"), head.commit) if not head.is_detached: refspec = "%s@{0}" % head.ref.name self.assertEqual(rev_parse(refspec), head.ref.commit) - # all additional specs work as well + # All additional specs work as well. self.assertEqual(rev_parse(refspec + "^{tree}"), head.commit.tree) self.assertEqual(rev_parse(refspec + ":CHANGES").type, "blob") # END operate on non-detached head - # position doesn't exist + # Position doesn't exist. self.assertRaises(IndexError, rev_parse, "@{10000}") - # currently, nothing more is supported + # Currently, nothing more is supported. self.assertRaises(NotImplementedError, rev_parse, "@{1 week ago}") - # the last position + # The last position. assert rev_parse("@{1}") != head.commit def test_repo_odbtype(self): @@ -1126,12 +1133,12 @@ class TestRepo(TestBase): @with_rw_repo("HEAD", bare=False) def test_submodule_update(self, rwrepo): - # fails in bare mode + # Fails in bare mode. rwrepo._bare = True self.assertRaises(InvalidGitRepositoryError, rwrepo.submodule_update) rwrepo._bare = False - # test create submodule + # Test submodule creation. sm = rwrepo.submodules[0] sm = rwrepo.create_submodule( "my_new_sub", @@ -1140,7 +1147,7 @@ class TestRepo(TestBase): ) self.assertIsInstance(sm, Submodule) - # note: the rest of this functionality is tested in test_submodule + # NOTE: The rest of this functionality is tested in test_submodule. @with_rw_repo("HEAD") def test_git_file(self, rwrepo): @@ -1166,17 +1173,18 @@ class TestRepo(TestBase): commit = next(repo.iter_commits(rev, path, max_count=1)) commit.tree[path] - # This is based on this comment + # This is based on this comment: # https://github.com/gitpython-developers/GitPython/issues/60#issuecomment-23558741 - # And we expect to set max handles to a low value, like 64 - # You should set ulimit -n X, see .travis.yml - # The loops below would easily create 500 handles if these would leak (4 pipes + multiple mapped files) + # And we expect to set max handles to a low value, like 64. + # You should set ulimit -n X. See .travis.yml. + # The loops below would easily create 500 handles if these would leak + # (4 pipes + multiple mapped files). for _ in range(64): for repo_type in (GitCmdObjectDB, GitDB): repo = Repo(self.rorepo.working_tree_dir, odbt=repo_type) last_commit(repo, "master", "test/test_base.py") - # end for each repository type - # end for each iteration + # END for each repository type + # END for each iteration def test_remote_method(self): self.assertRaises(ValueError, self.rorepo.remote, "foo-blue") @@ -1186,13 +1194,13 @@ class TestRepo(TestBase): def test_empty_repo(self, rw_dir): """Assure we can handle empty repositories""" r = Repo.init(rw_dir, mkdir=False) - # It's ok not to be able to iterate a commit, as there is none + # It's ok not to be able to iterate a commit, as there is none. self.assertRaises(ValueError, r.iter_commits) self.assertEqual(r.active_branch.name, "master") assert not r.active_branch.is_valid(), "Branch is yet to be born" - # actually, when trying to create a new branch without a commit, git itself fails - # We should, however, not fail ungracefully + # Actually, when trying to create a new branch without a commit, git itself + # fails. We should, however, not fail ungracefully. self.assertRaises(BadName, r.create_head, "foo") self.assertRaises(BadName, r.create_head, "master") # It's expected to not be able to access a tree @@ -1203,7 +1211,7 @@ class TestRepo(TestBase): r.index.add([new_file_path]) r.index.commit("initial commit\nBAD MESSAGE 1\n") - # Now a branch should be creatable + # Now a branch should be creatable. nb = r.create_head("foo") assert nb.is_valid() @@ -1226,7 +1234,7 @@ class TestRepo(TestBase): self.assertRaises(ValueError, repo.merge_base) self.assertRaises(ValueError, repo.merge_base, "foo") - # two commit merge-base + # Two commit merge-base. res = repo.merge_base(c1, c2) self.assertIsInstance(res, list) self.assertEqual(len(res), 1) @@ -1237,15 +1245,15 @@ class TestRepo(TestBase): res = repo.merge_base(c1, c2, c3, **{kw: True}) self.assertIsInstance(res, list) self.assertEqual(len(res), 1) - # end for each keyword signalling all merge-bases to be returned + # END for each keyword signalling all merge-bases to be returned - # Test for no merge base - can't do as we have + # Test for no merge base - can't do as we have. self.assertRaises(GitCommandError, repo.merge_base, c1, "ffffff") def test_is_ancestor(self): git = self.rorepo.git if git.version_info[:3] < (1, 8, 0): - raise SkipTest("git merge-base --is-ancestor feature unsupported") + raise RuntimeError("git merge-base --is-ancestor feature unsupported (test needs git 1.8.0 or later)") repo = self.rorepo c1 = "f6aa8d1" @@ -1266,22 +1274,22 @@ class TestRepo(TestBase): tree_sha = "960b40fe36" tag_sha = "42c2f60c43" - # Check for valid objects + # Check for valid objects. self.assertTrue(repo.is_valid_object(commit_sha)) self.assertTrue(repo.is_valid_object(blob_sha)) self.assertTrue(repo.is_valid_object(tree_sha)) self.assertTrue(repo.is_valid_object(tag_sha)) - # Check for valid objects of specific type + # Check for valid objects of specific type. self.assertTrue(repo.is_valid_object(commit_sha, "commit")) self.assertTrue(repo.is_valid_object(blob_sha, "blob")) self.assertTrue(repo.is_valid_object(tree_sha, "tree")) self.assertTrue(repo.is_valid_object(tag_sha, "tag")) - # Check for invalid objects + # Check for invalid objects. self.assertFalse(repo.is_valid_object(b"1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a1a", "blob")) - # Check for invalid objects of specific type + # Check for invalid objects of specific type. self.assertFalse(repo.is_valid_object(commit_sha, "blob")) self.assertFalse(repo.is_valid_object(blob_sha, "commit")) self.assertFalse(repo.is_valid_object(tree_sha, "commit")) @@ -1293,7 +1301,7 @@ class TestRepo(TestBase): based on it.""" git = Git(rw_dir) if git.version_info[:3] < (2, 5, 1): - raise SkipTest("worktree feature unsupported") + raise RuntimeError("worktree feature unsupported (test needs git 2.5.1 or later)") rw_master = self.rorepo.clone(join_path_native(rw_dir, "master_repo")) branch = rw_master.create_head("aaaaaaaa") @@ -1302,17 +1310,17 @@ class TestRepo(TestBase): worktree_path = cygpath(worktree_path) rw_master.git.worktree("add", worktree_path, branch.name) - # this ensures that we can read the repo's gitdir correctly + # This ensures that we can read the repo's gitdir correctly. repo = Repo(worktree_path) self.assertIsInstance(repo, Repo) - # this ensures we're able to actually read the refs in the tree, which - # means we can read commondir correctly. + # This ensures we're able to actually read the refs in the tree, which means we + # can read commondir correctly. commit = repo.head.commit self.assertIsInstance(commit, Object) - # this ensures we can read the remotes, which confirms we're reading - # the config correctly. + # This ensures we can read the remotes, which confirms we're reading the config + # correctly. origin = repo.remotes.origin self.assertIsInstance(origin, Remote) @@ -1320,11 +1328,11 @@ class TestRepo(TestBase): @with_rw_directory def test_git_work_tree_env(self, rw_dir): - """Check that we yield to GIT_WORK_TREE""" - # clone a repo - # move .git directory to a subdirectory - # set GIT_DIR and GIT_WORK_TREE appropriately - # check that repo.working_tree_dir == rw_dir + """Check that we yield to GIT_WORK_TREE.""" + # Clone a repo. + # Move .git directory to a subdirectory. + # Set GIT_DIR and GIT_WORK_TREE appropriately. + # Check that: repo.working_tree_dir == rw_dir self.rorepo.clone(join_path_native(rw_dir, "master_repo")) @@ -1377,6 +1385,11 @@ class TestRepo(TestBase): r.git.commit(message="init") self.assertEqual(r.git.show("HEAD:hello.txt", strip_newline_in_stdout=False), "hello\n") + @pytest.mark.xfail( + sys.platform == "win32", + reason=R"fatal: could not create leading directories of '--upload-pack=touch C:\Users\ek\AppData\Local\Temp\tmpnantqizc\pwn': Invalid argument", # noqa: E501 + raises=GitCommandError, + ) @with_rw_repo("HEAD") def test_clone_command_injection(self, rw_repo): with tempfile.TemporaryDirectory() as tdir: @@ -1388,7 +1401,7 @@ class TestRepo(TestBase): rw_repo.clone(payload) assert not unexpected_file.exists() - # A repo was cloned with the payload as name + # A repo was cloned with the payload as name. assert pathlib.Path(payload).exists() @with_rw_repo("HEAD") diff --git a/test/test_stats.py b/test/test_stats.py index 335ce48..91d2cf6 100644 --- a/test/test_stats.py +++ b/test/test_stats.py @@ -1,26 +1,32 @@ -# test_stats.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ -from test.lib import TestBase, fixture from git import Stats from git.compat import defenc +from test.lib import TestBase, fixture + class TestStats(TestBase): def test_list_from_string(self): output = fixture("diff_numstat").decode(defenc) stats = Stats._list_from_string(self.rorepo, output) - self.assertEqual(2, stats.total["files"]) - self.assertEqual(52, stats.total["lines"]) - self.assertEqual(29, stats.total["insertions"]) + self.assertEqual(3, stats.total["files"]) + self.assertEqual(59, stats.total["lines"]) + self.assertEqual(36, stats.total["insertions"]) self.assertEqual(23, stats.total["deletions"]) self.assertEqual(29, stats.files["a.txt"]["insertions"]) self.assertEqual(18, stats.files["a.txt"]["deletions"]) + self.assertEqual("M", stats.files["a.txt"]["change_type"]) self.assertEqual(0, stats.files["b.txt"]["insertions"]) self.assertEqual(5, stats.files["b.txt"]["deletions"]) + self.assertEqual("M", stats.files["b.txt"]["change_type"]) + + self.assertEqual(7, stats.files["c.txt"]["insertions"]) + self.assertEqual(0, stats.files["c.txt"]["deletions"]) + self.assertEqual("A", stats.files["c.txt"]["change_type"]) diff --git a/test/test_submodule.py b/test/test_submodule.py index 4a9c9c5..d88f9da 100644 --- a/test/test_submodule.py +++ b/test/test_submodule.py @@ -1,19 +1,20 @@ -# -*- coding: utf-8 -*- -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + import contextlib +import gc import os -import shutil -import tempfile +import os.path as osp from pathlib import Path +import shutil import sys -from unittest import mock, skipIf +import tempfile +from unittest import mock, skipUnless import pytest import git from git.cmd import Git -from git.compat import is_win from git.config import GitConfigParser, cp from git.exc import ( GitCommandError, @@ -25,11 +26,9 @@ from git.exc import ( from git.objects.submodule.base import Submodule from git.objects.submodule.root import RootModule, RootUpdateProgress from git.repo.fun import find_submodule_git_dir, touch -from test.lib import TestBase, with_rw_repo -from test.lib import with_rw_directory -from git.util import HIDE_WINDOWS_KNOWN_ERRORS -from git.util import to_native_path_linux, join_path_native -import os.path as osp +from git.util import HIDE_WINDOWS_KNOWN_ERRORS, join_path_native, to_native_path_linux + +from test.lib import TestBase, with_rw_directory, with_rw_repo @contextlib.contextmanager @@ -64,8 +63,6 @@ prog = TestRootProgress() class TestSubmodule(TestBase): def tearDown(self): - import gc - gc.collect() k_subm_current = "c15a6e1923a14bc760851913858a3942a4193cdb" @@ -74,48 +71,50 @@ class TestSubmodule(TestBase): def _do_base_tests(self, rwrepo): """Perform all tests in the given repository, it may be bare or nonbare""" - # manual instantiation + # Manual instantiation. smm = Submodule(rwrepo, "\0" * 20) - # name needs to be set in advance + # Name needs to be set in advance. self.assertRaises(AttributeError, getattr, smm, "name") - # iterate - 1 submodule + # Iterate - 1 submodule. sms = Submodule.list_items(rwrepo, self.k_subm_current) assert len(sms) == 1 sm = sms[0] - # at a different time, there is None + # At a different time, there is None. assert len(Submodule.list_items(rwrepo, self.k_no_subm_tag)) == 0 assert sm.path == "git/ext/gitdb" - assert sm.path != sm.name # in our case, we have ids there, which don't equal the path + assert sm.path != sm.name # In our case, we have ids there, which don't equal the path. assert sm.url.endswith("github.com/gitpython-developers/gitdb.git") assert sm.branch_path == "refs/heads/master" # the default ... assert sm.branch_name == "master" assert sm.parent_commit == rwrepo.head.commit - # size is always 0 + # Size is always 0. assert sm.size == 0 - # the module is not checked-out yet + # The module is not checked-out yet. self.assertRaises(InvalidGitRepositoryError, sm.module) - # which is why we can't get the branch either - it points into the module() repository + # ...which is why we can't get the branch either - it points into the module() + # repository. self.assertRaises(InvalidGitRepositoryError, getattr, sm, "branch") - # branch_path works, as its just a string + # branch_path works, as it's just a string. assert isinstance(sm.branch_path, str) - # some commits earlier we still have a submodule, but its at a different commit + # Some commits earlier we still have a submodule, but it's at a different + # commit. smold = next(Submodule.iter_items(rwrepo, self.k_subm_changed)) assert smold.binsha != sm.binsha assert smold != sm # the name changed - # force it to reread its information + # Force it to reread its information. del smold._url smold.url == sm.url # noqa: B015 # FIXME: Should this be an assertion? - # test config_reader/writer methods + # Test config_reader/writer methods. sm.config_reader() - new_smclone_path = None # keep custom paths for later + new_smclone_path = None # Keep custom paths for later. new_csmclone_path = None # if rwrepo.bare: with self.assertRaises(InvalidGitRepositoryError): @@ -123,7 +122,7 @@ class TestSubmodule(TestBase): pass else: with sm.config_writer() as writer: - # for faster checkout, set the url to the local path + # For faster checkout, set the url to the local path. new_smclone_path = Git.polish_url(osp.join(self.rorepo.working_tree_dir, sm.path)) writer.set_value("url", new_smclone_path) writer.release() @@ -132,76 +131,77 @@ class TestSubmodule(TestBase): # END handle bare repo smold.config_reader() - # cannot get a writer on historical submodules + # Cannot get a writer on historical submodules. if not rwrepo.bare: with self.assertRaises(ValueError): with smold.config_writer(): pass # END handle bare repo - # make the old into a new - this doesn't work as the name changed + # Make the old into a new - this doesn't work as the name changed. self.assertRaises(ValueError, smold.set_parent_commit, self.k_subm_current) # the sha is properly updated smold.set_parent_commit(self.k_subm_changed + "~1") assert smold.binsha != sm.binsha - # raises if the sm didn't exist in new parent - it keeps its - # parent_commit unchanged + # Raises if the sm didn't exist in new parent - it keeps its parent_commit + # unchanged. self.assertRaises(ValueError, smold.set_parent_commit, self.k_no_subm_tag) - # TEST TODO: if a path in the gitmodules file, but not in the index, it raises + # TODO: Test that, if a path is in the .gitmodules file, but not in the index, + # then it raises. # TEST UPDATE ############## - # module retrieval is not always possible + # Module retrieval is not always possible. if rwrepo.bare: self.assertRaises(InvalidGitRepositoryError, sm.module) self.assertRaises(InvalidGitRepositoryError, sm.remove) self.assertRaises(InvalidGitRepositoryError, sm.add, rwrepo, "here", "there") else: - # its not checked out in our case + # It's not checked out in our case. self.assertRaises(InvalidGitRepositoryError, sm.module) assert not sm.module_exists() - # currently there is only one submodule + # Currently there is only one submodule. assert len(list(rwrepo.iter_submodules())) == 1 assert sm.binsha != "\0" * 20 # TEST ADD ########### - # preliminary tests - # adding existing returns exactly the existing + # Preliminary tests. + # Adding existing returns exactly the existing. sma = Submodule.add(rwrepo, sm.name, sm.path) assert sma.path == sm.path - # no url and no module at path fails + # No url and no module at path fails. self.assertRaises(ValueError, Submodule.add, rwrepo, "newsubm", "pathtorepo", url=None) # CONTINUE UPDATE ################# - # lets update it - its a recursive one too + # Let's update it - it's a recursive one too. newdir = osp.join(sm.abspath, "dir") os.makedirs(newdir) - # update fails if the path already exists non-empty + # Update fails if the path already exists non-empty. self.assertRaises(OSError, sm.update) os.rmdir(newdir) - # dry-run does nothing + # Dry-run does nothing. sm.update(dry_run=True, progress=prog) assert not sm.module_exists() assert sm.update() is sm - sm_repopath = sm.path # cache for later + sm_repopath = sm.path # Cache for later. assert sm.module_exists() assert isinstance(sm.module(), git.Repo) assert sm.module().working_tree_dir == sm.abspath # INTERLEAVE ADD TEST ##################### - # url must match the one in the existing repository ( if submodule name suggests a new one ) - # or we raise + # url must match the one in the existing repository (if submodule name + # suggests a new one) or we raise. self.assertRaises( ValueError, Submodule.add, @@ -213,53 +213,55 @@ class TestSubmodule(TestBase): # CONTINUE UPDATE ################# - # we should have setup a tracking branch, which is also active + # We should have setup a tracking branch, which is also active. assert sm.module().head.ref.tracking_branch() is not None - # delete the whole directory and re-initialize + # Delete the whole directory and re-initialize. assert len(sm.children()) != 0 # shutil.rmtree(sm.abspath) sm.remove(force=True, configuration=False) assert len(sm.children()) == 0 - # dry-run does nothing + # Dry-run does nothing. sm.update(dry_run=True, recursive=False, progress=prog) assert len(sm.children()) == 0 sm.update(recursive=False) assert len(list(rwrepo.iter_submodules())) == 2 - assert len(sm.children()) == 1 # its not checked out yet + assert len(sm.children()) == 1 # It's not checked out yet. csm = sm.children()[0] assert not csm.module_exists() csm_repopath = csm.path - # adjust the path of the submodules module to point to the local destination + # Adjust the path of the submodules module to point to the local + # destination. new_csmclone_path = Git.polish_url(osp.join(self.rorepo.working_tree_dir, sm.path, csm.path)) with csm.config_writer() as writer: writer.set_value("url", new_csmclone_path) assert csm.url == new_csmclone_path - # dry-run does nothing + # Dry-run does nothing. assert not csm.module_exists() sm.update(recursive=True, dry_run=True, progress=prog) assert not csm.module_exists() - # update recursively again + # Update recursively again. sm.update(recursive=True) assert csm.module_exists() - # tracking branch once again + # Tracking branch once again. assert csm.module().head.ref.tracking_branch() is not None - # this flushed in a sub-submodule + # This flushed in a sub-submodule. assert len(list(rwrepo.iter_submodules())) == 2 - # reset both heads to the previous version, verify that to_latest_revision works + # Reset both heads to the previous version, verify that to_latest_revision + # works. smods = (sm.module(), csm.module()) for repo in smods: repo.head.reset("HEAD~2", working_tree=1) # END for each repo to reset - # dry run does nothing + # Dry-run does nothing. self.assertRaises( RepositoryDirtyError, sm.update, @@ -279,89 +281,89 @@ class TestSubmodule(TestBase): # END for each repo to check del smods - # if the head is detached, it still works ( but warns ) + # If the head is detached, it still works (but warns). smref = sm.module().head.ref sm.module().head.ref = "HEAD~1" - # if there is no tracking branch, we get a warning as well + # If there is no tracking branch, we get a warning as well. csm_tracking_branch = csm.module().head.ref.tracking_branch() csm.module().head.ref.set_tracking_branch(None) sm.update(recursive=True, to_latest_revision=True) # to_latest_revision changes the child submodule's commit, it needs an - # update now + # update now. csm.set_parent_commit(csm.repo.head.commit) - # undo the changes + # Undo the changes. sm.module().head.ref = smref csm.module().head.ref.set_tracking_branch(csm_tracking_branch) # REMOVAL OF REPOSITORY ####################### - # must delete something + # Must delete something. self.assertRaises(ValueError, csm.remove, module=False, configuration=False) - # module() is supposed to point to gitdb, which has a child-submodule whose URL is still pointing - # to GitHub. To save time, we will change it to + # module() is supposed to point to gitdb, which has a child-submodule whose + # URL is still pointing to GitHub. To save time, we will change it to: csm.set_parent_commit(csm.repo.head.commit) with csm.config_writer() as cw: cw.set_value("url", self._small_repo_url()) csm.repo.index.commit("adjusted URL to point to local source, instead of the internet") # We have modified the configuration, hence the index is dirty, and the - # deletion will fail - # NOTE: As we did a few updates in the meanwhile, the indices were reset - # Hence we create some changes + # deletion will fail. + # NOTE: As we did a few updates in the meanwhile, the indices were reset. + # Hence we create some changes. csm.set_parent_commit(csm.repo.head.commit) with sm.config_writer() as writer: writer.set_value("somekey", "somevalue") with csm.config_writer() as writer: writer.set_value("okey", "ovalue") self.assertRaises(InvalidGitRepositoryError, sm.remove) - # if we remove the dirty index, it would work + # If we remove the dirty index, it would work. sm.module().index.reset() - # still, we have the file modified + # Still, we have the file modified. self.assertRaises(InvalidGitRepositoryError, sm.remove, dry_run=True) sm.module().index.reset(working_tree=True) - # enforce the submodule to be checked out at the right spot as well. + # Enforce the submodule to be checked out at the right spot as well. csm.update() assert csm.module_exists() assert csm.exists() assert osp.isdir(csm.module().working_tree_dir) - # this would work + # This would work. assert sm.remove(force=True, dry_run=True) is sm assert sm.module_exists() sm.remove(force=True, dry_run=True) assert sm.module_exists() - # but ... we have untracked files in the child submodule + # But... we have untracked files in the child submodule. fn = join_path_native(csm.module().working_tree_dir, "newfile") with open(fn, "w") as fd: fd.write("hi") self.assertRaises(InvalidGitRepositoryError, sm.remove) - # forcibly delete the child repository + # Forcibly delete the child repository. prev_count = len(sm.children()) self.assertRaises(ValueError, csm.remove, force=True) # We removed sm, which removed all submodules. However, the instance we - # have still points to the commit prior to that, where it still existed + # have still points to the commit prior to that, where it still existed. csm.set_parent_commit(csm.repo.commit(), check=False) assert not csm.exists() assert not csm.module_exists() assert len(sm.children()) == prev_count - # now we have a changed index, as configuration was altered. - # fix this + # Now we have a changed index, as configuration was altered. + # Fix this. sm.module().index.reset(working_tree=True) - # now delete only the module of the main submodule + # Now delete only the module of the main submodule. assert sm.module_exists() sm.remove(configuration=False, force=True) assert sm.exists() assert not sm.module_exists() assert sm.config_reader().get_value("url") - # delete the rest + # Delete the rest. sm_path = sm.path sm.remove() assert not sm.exists() @@ -372,7 +374,7 @@ class TestSubmodule(TestBase): # ADD NEW SUBMODULE ################### - # add a simple remote repo - trailing slashes are no problem + # Add a simple remote repo - trailing slashes are no problem. smid = "newsub" osmid = "othersub" nsm = Submodule.add( @@ -386,11 +388,11 @@ class TestSubmodule(TestBase): assert nsm.name == smid assert nsm.module_exists() assert nsm.exists() - # its not checked out + # It's not checked out. assert not osp.isfile(join_path_native(nsm.module().working_tree_dir, Submodule.k_modules_file)) assert len(rwrepo.submodules) == 1 - # add another submodule, but into the root, not as submodule + # Add another submodule, but into the root, not as submodule. osm = Submodule.add(rwrepo, osmid, csm_repopath, new_csmclone_path, Submodule.k_head_default) assert osm != nsm assert osm.module_exists() @@ -399,28 +401,28 @@ class TestSubmodule(TestBase): assert len(rwrepo.submodules) == 2 - # commit the changes, just to finalize the operation + # Commit the changes, just to finalize the operation. rwrepo.index.commit("my submod commit") assert len(rwrepo.submodules) == 2 - # needs update as the head changed, it thinks its in the history - # of the repo otherwise + # Needs update, as the head changed. + # It thinks it's in the history of the repo otherwise. nsm.set_parent_commit(rwrepo.head.commit) osm.set_parent_commit(rwrepo.head.commit) # MOVE MODULE ############# - # invalid input + # Invalid input. self.assertRaises(ValueError, nsm.move, "doesntmatter", module=False, configuration=False) - # renaming to the same path does nothing + # Renaming to the same path does nothing. assert nsm.move(sm_path) is nsm - # rename a module - nmp = join_path_native("new", "module", "dir") + "/" # new module path + # Rename a module. + nmp = join_path_native("new", "module", "dir") + "/" # New module path. pmp = nsm.path assert nsm.move(nmp) is nsm - nmp = nmp[:-1] # cut last / + nmp = nmp[:-1] # Cut last / nmpl = to_native_path_linux(nmp) assert nsm.path == nmpl assert rwrepo.submodules[0].path == nmpl @@ -431,14 +433,15 @@ class TestSubmodule(TestBase): self.assertRaises(ValueError, nsm.move, mpath) os.remove(absmpath) - # now it works, as we just move it back + # Now it works, as we just move it back. nsm.move(pmp) assert nsm.path == pmp assert rwrepo.submodules[0].path == pmp # REMOVE 'EM ALL ################ - # if a submodule's repo has no remotes, it can't be added without an explicit url + # If a submodule's repo has no remotes, it can't be added without an + # explicit url. osmod = osm.module() osm.remove(module=False) @@ -448,7 +451,7 @@ class TestSubmodule(TestBase): self.assertRaises(ValueError, Submodule.add, rwrepo, osmid, csm_repopath, url=None) # END handle bare mode - # Error if there is no submodule file here + # Error if there is no submodule file here. self.assertRaises( IOError, Submodule._config_parser, @@ -457,10 +460,13 @@ class TestSubmodule(TestBase): True, ) - # @skipIf(HIDE_WINDOWS_KNOWN_ERRORS, ## ACTUALLY skipped by `git.submodule.base#L869`. + # ACTUALLY skipped by git.util.rmtree (in local onerror function), called via + # git.objects.submodule.base.Submodule.remove at "method(mp)", line 1011. + # + # @skipIf(HIDE_WINDOWS_KNOWN_ERRORS, # "FIXME: fails with: PermissionError: [WinError 32] The process cannot access the file because" # "it is being used by another process: " - # "'C:\\Users\\ankostis\\AppData\\Local\\Temp\\tmp95c3z83bnon_bare_test_base_rw\\git\\ext\\gitdb\\gitdb\\ext\\smmap'") # noqa E501 + # "'C:\\Users\\ankostis\\AppData\\Local\\Temp\\tmp95c3z83bnon_bare_test_base_rw\\git\\ext\\gitdb\\gitdb\\ext\\smmap'") # noqa: E501 @with_rw_repo(k_subm_current) def test_base_rw(self, rwrepo): self._do_base_tests(rwrepo) @@ -474,22 +480,21 @@ class TestSubmodule(TestBase): reason="Cygwin GitPython can't find submodule SHA", raises=ValueError, ) - @skipIf( + @pytest.mark.xfail( HIDE_WINDOWS_KNOWN_ERRORS, - """ - File "C:\\projects\\gitpython\\git\\cmd.py", line 559, in execute - raise GitCommandNotFound(command, err) - git.exc.GitCommandNotFound: Cmd('git') not found due to: OSError('[WinError 6] The handle is invalid') - cmdline: git clone -n --shared -v C:\\projects\\gitpython\\.git Users\\appveyor\\AppData\\Local\\Temp\\1\\tmplyp6kr_rnon_bare_test_root_module - """, # noqa E501 + reason=( + '"The process cannot access the file because it is being used by another process"' + + " on first call to rm.update" + ), + raises=PermissionError, ) @with_rw_repo(k_subm_current, bare=False) def test_root_module(self, rwrepo): - # Can query everything without problems + # Can query everything without problems. rm = RootModule(self.rorepo) assert rm.module() is self.rorepo - # try attributes + # Try attributes. rm.binsha rm.mode rm.path @@ -503,110 +508,117 @@ class TestSubmodule(TestBase): with rm.config_writer(): pass - # deep traversal gitdb / async + # Deep traversal gitdb / async. rsmsp = [sm.path for sm in rm.traverse()] - assert len(rsmsp) >= 2 # gitdb and async [and smmap], async being a child of gitdb + assert len(rsmsp) >= 2 # gitdb and async [and smmap], async being a child of gitdb. - # cannot set the parent commit as root module's path didn't exist + # Cannot set the parent commit as root module's path didn't exist. self.assertRaises(ValueError, rm.set_parent_commit, "HEAD") # TEST UPDATE ############# - # setup commit which remove existing, add new and modify existing submodules + # Set up a commit that removes existing, adds new and modifies existing + # submodules. rm = RootModule(rwrepo) assert len(rm.children()) == 1 - # modify path without modifying the index entry - # ( which is what the move method would do properly ) + # Modify path without modifying the index entry. + # (Which is what the move method would do properly.) # ================================================== sm = rm.children()[0] pp = "path/prefix" fp = join_path_native(pp, sm.path) prep = sm.path - assert not sm.module_exists() # was never updated after rwrepo's clone + assert not sm.module_exists() # It was never updated after rwrepo's clone. - # assure we clone from a local source + # Ensure we clone from a local source. with sm.config_writer() as writer: writer.set_value("url", Git.polish_url(osp.join(self.rorepo.working_tree_dir, sm.path))) - # dry-run does nothing + # Dry-run does nothing. sm.update(recursive=False, dry_run=True, progress=prog) assert not sm.module_exists() sm.update(recursive=False) assert sm.module_exists() with sm.config_writer() as writer: - writer.set_value("path", fp) # change path to something with prefix AFTER url change + # Change path to something with prefix AFTER url change. + writer.set_value("path", fp) - # update doesn't fail, because list_items ignores the wrong path in such situations. + # Update doesn't fail, because list_items ignores the wrong path in such + # situations. rm.update(recursive=False) - # move it properly - doesn't work as it its path currently points to an indexentry - # which doesn't exist ( move it to some path, it doesn't matter here ) + # Move it properly - doesn't work as it its path currently points to an + # indexentry which doesn't exist (move it to some path, it doesn't matter here). self.assertRaises(InvalidGitRepositoryError, sm.move, pp) - # reset the path(cache) to where it was, now it works + # Reset the path(cache) to where it was, now it works. sm.path = prep - sm.move(fp, module=False) # leave it at the old location + sm.move(fp, module=False) # Leave it at the old location. assert not sm.module_exists() - cpathchange = rwrepo.index.commit("changed sm path") # finally we can commit + cpathchange = rwrepo.index.commit("changed sm path") # Finally we can commit. - # update puts the module into place + # Update puts the module into place. rm.update(recursive=False, progress=prog) sm.set_parent_commit(cpathchange) assert sm.module_exists() - # add submodule - # ================ + # Add submodule. + # ============== nsmn = "newsubmodule" nsmp = "submrepo" subrepo_url = Git.polish_url(osp.join(self.rorepo.working_tree_dir, rsmsp[0], rsmsp[1])) nsm = Submodule.add(rwrepo, nsmn, nsmp, url=subrepo_url) - csmadded = rwrepo.index.commit("Added submodule").hexsha # make sure we don't keep the repo reference + csmadded = rwrepo.index.commit("Added submodule").hexsha # Make sure we don't keep the repo reference. nsm.set_parent_commit(csmadded) assert nsm.module_exists() - # in our case, the module should not exist, which happens if we update a parent - # repo and a new submodule comes into life + # In our case, the module should not exist, which happens if we update a parent + # repo and a new submodule comes into life. nsm.remove(configuration=False, module=True) assert not nsm.module_exists() and nsm.exists() - # dry-run does nothing + # Dry-run does nothing. rm.update(recursive=False, dry_run=True, progress=prog) - # otherwise it will work + # Otherwise it will work. rm.update(recursive=False, progress=prog) assert nsm.module_exists() - # remove submodule - the previous one + # Remove submodule - the previous one. # ==================================== sm.set_parent_commit(csmadded) smp = sm.abspath assert not sm.remove(module=False).exists() - assert osp.isdir(smp) # module still exists + assert osp.isdir(smp) # Module still exists. csmremoved = rwrepo.index.commit("Removed submodule") - # an update will remove the module - # not in dry_run + # An update will remove the module. + # Not in dry_run. rm.update(recursive=False, dry_run=True, force_remove=True) assert osp.isdir(smp) - # when removing submodules, we may get new commits as nested submodules are auto-committing changes - # to allow deletions without force, as the index would be dirty otherwise. + # When removing submodules, we may get new commits as nested submodules are + # auto-committing changes to allow deletions without force, as the index would + # be dirty otherwise. # QUESTION: Why does this seem to work in test_git_submodule_compatibility() ? self.assertRaises(InvalidGitRepositoryError, rm.update, recursive=False, force_remove=False) rm.update(recursive=False, force_remove=True) assert not osp.isdir(smp) - # 'apply work' to the nested submodule and assure this is not removed/altered during updates - # Need to commit first, otherwise submodule.update wouldn't have a reason to change the head + # 'Apply work' to the nested submodule and ensure this is not removed/altered + # during updates. We need to commit first, otherwise submodule.update wouldn't + # have a reason to change the head. touch(osp.join(nsm.module().working_tree_dir, "new-file")) - # We cannot expect is_dirty to even run as we wouldn't reset a head to the same location + # We cannot expect is_dirty to even run as we wouldn't reset a head to the same + # location. assert nsm.module().head.commit.hexsha == nsm.hexsha nsm.module().index.add([nsm]) nsm.module().index.commit("added new file") - rm.update(recursive=False, dry_run=True, progress=prog) # would not change head, and thus doesn't fail - # Everything we can do from now on will trigger the 'future' check, so no is_dirty() check will even run - # This would only run if our local branch is in the past and we have uncommitted changes + rm.update(recursive=False, dry_run=True, progress=prog) # Would not change head, and thus doesn't fail. + # Everything we can do from now on will trigger the 'future' check, so no + # is_dirty() check will even run. This would only run if our local branch is in + # the past and we have uncommitted changes. prev_commit = nsm.module().head.commit rm.update(recursive=False, dry_run=False, progress=prog) @@ -616,10 +628,10 @@ class TestSubmodule(TestBase): rm.update(recursive=True, progress=prog, force_reset=True) assert prev_commit != nsm.module().head.commit, "head changed, as the remote url and its commit changed" - # change url ... - # =============== - # ... to the first repository, this way we have a fast checkout, and a completely different - # repository at the different url + # Change url... + # ============= + # ...to the first repository. This way we have a fast checkout, and a completely + # different repository at the different url. nsm.set_parent_commit(csmremoved) nsmurl = Git.polish_url(osp.join(self.rorepo.working_tree_dir, rsmsp[0])) with nsm.config_writer() as writer: @@ -627,7 +639,7 @@ class TestSubmodule(TestBase): csmpathchange = rwrepo.index.commit("changed url") nsm.set_parent_commit(csmpathchange) - # Now nsm head is in the future of the tracked remote branch + # Now nsm head is in the future of the tracked remote branch. prev_commit = nsm.module().head.commit # dry-run does nothing rm.update(recursive=False, dry_run=True, progress=prog) @@ -639,16 +651,15 @@ class TestSubmodule(TestBase): assert len(rwrepo.submodules) == 1 assert not rwrepo.submodules[0].children()[0].module_exists(), "nested submodule should not be checked out" - # add the submodule's changed commit to the index, which is what the - # user would do - # beforehand, update our instance's binsha with the new one + # Add the submodule's changed commit to the index, which is what the user would + # do. Beforehand, update our instance's binsha with the new one. nsm.binsha = nsm.module().head.commit.binsha rwrepo.index.add([nsm]) - # change branch - # ================= - # we only have one branch, so we switch to a virtual one, and back - # to the current one to trigger the difference + # Change branch. + # ============== + # We only have one branch, so we switch to a virtual one, and back to the + # current one to trigger the difference. cur_branch = nsm.branch nsmm = nsm.module() prev_commit = nsmm.head.commit @@ -659,37 +670,49 @@ class TestSubmodule(TestBase): nsm.set_parent_commit(csmbranchchange) # END for each branch to change - # Lets remove our tracking branch to simulate some changes + # Let's remove our tracking branch to simulate some changes. nsmmh = nsmm.head - assert nsmmh.ref.tracking_branch() is None # never set it up until now + assert nsmmh.ref.tracking_branch() is None # Never set it up until now. assert not nsmmh.is_detached - # dry run does nothing + # Dry-run does nothing. rm.update(recursive=False, dry_run=True, progress=prog) assert nsmmh.ref.tracking_branch() is None - # the real thing does + # The real thing does. rm.update(recursive=False, progress=prog) assert nsmmh.ref.tracking_branch() is not None assert not nsmmh.is_detached - # recursive update + # Recursive update. # ================= - # finally we recursively update a module, just to run the code at least once - # remove the module so that it has more work - assert len(nsm.children()) >= 1 # could include smmap + # Finally we recursively update a module, just to run the code at least once + # remove the module so that it has more work. + assert len(nsm.children()) >= 1 # Could include smmap. assert nsm.exists() and nsm.module_exists() and len(nsm.children()) >= 1 - # assure we pull locally only + # Ensure we pull locally only. nsmc = nsm.children()[0] with nsmc.config_writer() as writer: writer.set_value("url", subrepo_url) - rm.update(recursive=True, progress=prog, dry_run=True) # just to run the code + rm.update(recursive=True, progress=prog, dry_run=True) # Just to run the code. rm.update(recursive=True, progress=prog) - # gitdb: has either 1 or 2 submodules depending on the version + # gitdb: has either 1 or 2 submodules depending on the version. assert len(nsm.children()) >= 1 and nsmc.module_exists() + def test_iter_items_from_nonexistent_hash(self): + it = Submodule.iter_items(self.rorepo, "b4ecbfaa90c8be6ed6d9fb4e57cc824663ae15b4") + with self.assertRaisesRegex(ValueError, r"\bcould not be resolved\b"): + next(it) + + def test_iter_items_from_invalid_hash(self): + """Check legacy behavaior on BadName (also applies to IOError, i.e. OSError).""" + it = Submodule.iter_items(self.rorepo, "xyz") + with self.assertRaises(StopIteration) as ctx: + next(it) + self.assertIsNone(ctx.exception.value) + @with_rw_repo(k_no_subm_tag, bare=False) def test_first_submodule(self, rwrepo): assert len(list(rwrepo.iter_submodules())) == 0 @@ -701,7 +724,7 @@ class TestSubmodule(TestBase): sm = rwrepo.create_submodule(sm_name, sm_path, rwrepo.git_dir, no_checkout=True) assert sm.exists() and sm.module_exists() rwrepo.index.commit("Added submodule " + sm_name) - # end for each submodule path to add + # END for each submodule path to add self.assertRaises(ValueError, rwrepo.create_submodule, "fail", osp.expanduser("~")) self.assertRaises( @@ -728,7 +751,7 @@ class TestSubmodule(TestBase): url=empty_repo_dir, no_checkout=checkout_mode and True or False, ) - # end for each checkout mode + # END for each checkout mode @with_rw_directory @_patch_git_config("protocol.file.allow", "always") @@ -740,7 +763,7 @@ class TestSubmodule(TestBase): assert len(repo.submodules) == 1 - # Delete the directory from submodule + # Delete the directory from submodule. submodule_path = osp.join(repo_path, "module") shutil.rmtree(submodule_path) repo.git.add([submodule_path]) @@ -749,15 +772,13 @@ class TestSubmodule(TestBase): repo = git.Repo(repo_path) assert len(repo.submodules) == 0 - @skipIf( + @pytest.mark.xfail( HIDE_WINDOWS_KNOWN_ERRORS, - """FIXME on cygwin: File "C:\\projects\\gitpython\\git\\cmd.py", line 671, in execute - raise GitCommandError(command, status, stderr_value, stdout_value) - GitCommandError: Cmd('git') failed due to: exit code(128) - cmdline: git add 1__Xava verbXXten 1_test _myfile 1_test_other_file 1_XXava-----verbXXten - stderr: 'fatal: pathspec '"1__çava verböten"' did not match any files' - FIXME on appveyor: see https://ci.appveyor.com/project/Byron/gitpython/build/1.0.185 - """, + reason=( + '"The process cannot access the file because it is being used by another process"' + + " on first call to sm.move" + ), + raises=PermissionError, ) @with_rw_directory @_patch_git_config("protocol.file.allow", "always") @@ -782,8 +803,8 @@ class TestSubmodule(TestBase): for init in (False, True): sm.update(init=init) sm2.update(init=init) - # end for each init state - # end for each iteration + # END for each init state + # END for each iteration sm.move(sm.path + "_moved") sm2.move(sm2.path + "_moved") @@ -800,13 +821,13 @@ class TestSubmodule(TestBase): smm.git.add(Git.polish_url(fp)) smm.git.commit(m="new file added") - # submodules are retrieved from the current commit's tree, therefore we can't really get a new submodule - # object pointing to the new submodule commit + # Submodules are retrieved from the current commit's tree, therefore we can't + # really get a new submodule object pointing to the new submodule commit. sm_too = parent.submodules["module_moved"] assert parent.head.commit.tree[sm.path].binsha == sm.binsha assert sm_too.binsha == sm.binsha, "cached submodule should point to the same commit as updated one" - added_bies = parent.index.add([sm]) # added base-index-entries + added_bies = parent.index.add([sm]) # Added base-index-entries. assert len(added_bies) == 1 parent.index.commit("add same submodule entry") commit_sm = parent.head.commit.tree[sm.path] @@ -822,9 +843,11 @@ class TestSubmodule(TestBase): assert commit_sm.binsha == sm_too.binsha assert sm_too.binsha != sm.binsha - # @skipIf(HIDE_WINDOWS_KNOWN_ERRORS, ## ACTUALLY skipped by `git.submodule.base#L869`. - # "FIXME: helper.wrapper fails with: PermissionError: [WinError 5] Access is denied: " - # "'C:\\Users\\appveyor\\AppData\\Local\\Temp\\1\\test_work_tree_unsupportedryfa60di\\master_repo\\.git\\objects\\pack\\pack-bc9e0787aef9f69e1591ef38ea0a6f566ec66fe3.idx") # noqa E501 + @pytest.mark.xfail( + HIDE_WINDOWS_KNOWN_ERRORS, + reason='"The process cannot access the file because it is being used by another process" on call to sm.move', + raises=PermissionError, + ) @with_rw_directory def test_git_submodule_compatibility(self, rwdir): parent = git.Repo.init(osp.join(rwdir, "parent")) @@ -836,10 +859,11 @@ class TestSubmodule(TestBase): assert sm.exists() == value assert sm.module_exists() == value - # end + # END assert_exists - # As git is backwards compatible itself, it would still recognize what we do here ... unless we really - # muss it up. That's the only reason why the test is still here ... . + # As git is backwards compatible itself, it would still recognize what we do + # here... unless we really muss it up. That's the only reason why the test is + # still here... assert len(parent.git.submodule().splitlines()) == 1 module_repo_path = osp.join(sm.module().working_tree_dir, ".git") @@ -851,14 +875,14 @@ class TestSubmodule(TestBase): assert osp.isfile(module_repo_path) assert sm.module().has_separate_working_tree() assert find_submodule_git_dir(module_repo_path) is not None, "module pointed to by .git file must be valid" - # end verify submodule 'style' + # END verify submodule 'style' - # test move + # Test move. new_sm_path = join_path_native("submodules", "one") sm.move(new_sm_path) assert_exists(sm) - # Add additional submodule level + # Add additional submodule level. csm = sm.module().create_submodule( "nested-submodule", join_path_native("nested-submodule", "working-tree"), @@ -868,21 +892,22 @@ class TestSubmodule(TestBase): sm_head_commit = sm.module().commit() assert_exists(csm) - # Fails because there are new commits, compared to the remote we cloned from + # Fails because there are new commits, compared to the remote we cloned from. self.assertRaises(InvalidGitRepositoryError, sm.remove, dry_run=True) assert_exists(sm) assert sm.module().commit() == sm_head_commit assert_exists(csm) - # rename nested submodule - # This name would move itself one level deeper - needs special handling internally + # Rename nested submodule. + # This name would move itself one level deeper - needs special handling + # internally. new_name = csm.name + "/mine" assert csm.rename(new_name).name == new_name assert_exists(csm) assert csm.repo.is_dirty(index=True, working_tree=False), "index must contain changed .gitmodules file" csm.repo.index.commit("renamed module") - # keep_going evaluation + # keep_going evaluation. rsm = parent.submodule_update() assert_exists(sm) assert_exists(csm) @@ -908,7 +933,7 @@ class TestSubmodule(TestBase): sm.remove(dry_run=dry_run, force=True) assert_exists(sm, value=dry_run) assert osp.isdir(sm_module_path) == dry_run - # end for each dry-run mode + # END for each dry-run mode @with_rw_directory def test_ignore_non_submodule_file(self, rwdir): @@ -942,11 +967,11 @@ class TestSubmodule(TestBase): parent.index.commit("Added submodule") assert sm.repo is parent # yoh was surprised since expected sm repo!! - # so created a new instance for submodule + # So created a new instance for submodule. smrepo = git.Repo(osp.join(rwdir, "parent", sm.path)) - # Adding a remote without fetching so would have no references + # Adding a remote without fetching so would have no references. smrepo.create_remote("special", "git@server-shouldnotmatter:repo.git") - # And we should be able to remove it just fine + # And we should be able to remove it just fine. sm.remove() assert not sm.exists() @@ -960,6 +985,12 @@ class TestSubmodule(TestBase): assert sm.rename(sm_name) is sm and sm.name == sm_name assert not sm.repo.is_dirty(index=True, working_tree=False, untracked_files=False) + # This is needed to work around a PermissionError on Windows, resembling others, + # except new in Python 3.12. (*Maybe* this could be due to changes in CPython's + # garbage collector detailed in https://github.com/python/cpython/issues/97922.) + if sys.platform == "win32" and sys.version_info >= (3, 12): + gc.collect() + new_path = "renamed/myname" assert sm.move(new_path).name == new_path @@ -971,12 +1002,11 @@ class TestSubmodule(TestBase): sm_mod = sm.module() if osp.isfile(osp.join(sm_mod.working_tree_dir, ".git")) == sm._need_gitfile_submodules(parent.git): assert sm_mod.git_dir.endswith(join_path_native(".git", "modules", new_sm_name)) - # end @with_rw_directory def test_branch_renames(self, rw_dir): - # Setup initial sandbox: - # parent repo has one submodule, which has all the latest changes + # Set up initial sandbox: + # The parent repo has one submodule, which has all the latest changes. source_url = self._small_repo_url() sm_source_repo = git.Repo.clone_from(source_url, osp.join(rw_dir, "sm-source"), b="master") parent_repo = git.Repo.init(osp.join(rw_dir, "parent")) @@ -989,20 +1019,22 @@ class TestSubmodule(TestBase): parent_repo.index.commit("added submodule") assert sm.exists() - # Create feature branch with one new commit in submodule source + # Create feature branch with one new commit in submodule source. sm_fb = sm_source_repo.create_head("feature") sm_fb.checkout() new_file = touch(osp.join(sm_source_repo.working_tree_dir, "new-file")) sm_source_repo.index.add([new_file]) sm.repo.index.commit("added new file") - # change designated submodule checkout branch to the new upstream feature branch + # Change designated submodule checkout branch to the new upstream feature + # branch. with sm.config_writer() as smcw: smcw.set_value("branch", sm_fb.name) assert sm.repo.is_dirty(index=True, working_tree=False) sm.repo.index.commit("changed submodule branch to '%s'" % sm_fb) - # verify submodule update with feature branch that leaves currently checked out branch in it's past + # Verify submodule update with feature branch that leaves currently checked out + # branch in it's past. sm_mod = sm.module() prev_commit = sm_mod.commit() assert sm_mod.head.ref.name == "master" @@ -1014,34 +1046,35 @@ class TestSubmodule(TestBase): assert sm_mod.head.ref.name == sm_fb.name assert sm_mod.commit() == sm_fb.commit - # Create new branch which is in our past, and thus seemingly unrelated to the currently checked out one - # To make it even 'harder', we shall fork and create a new commit + # Create new branch which is in our past, and thus seemingly unrelated to the + # currently checked out one. + # To make it even 'harder', we shall fork and create a new commit. sm_pfb = sm_source_repo.create_head("past-feature", commit="HEAD~20") sm_pfb.checkout() sm_source_repo.index.add([touch(osp.join(sm_source_repo.working_tree_dir, "new-file"))]) sm_source_repo.index.commit("new file added, to past of '%r'" % sm_fb) - # Change designated submodule checkout branch to a new commit in its own past + # Change designated submodule checkout branch to a new commit in its own past. with sm.config_writer() as smcw: smcw.set_value("branch", sm_pfb.path) sm.repo.index.commit("changed submodule branch to '%s'" % sm_pfb) - # Test submodule updates - must fail if submodule is dirty + # Test submodule updates - must fail if submodule is dirty. touch(osp.join(sm_mod.working_tree_dir, "unstaged file")) - # This doesn't fail as our own submodule binsha didn't change, and the reset is only triggered if - # to latest revision is True. + # This doesn't fail as our own submodule binsha didn't change, and the reset is + # only triggered if to_latest_revision is True. parent_repo.submodule_update(to_latest_revision=False) - sm_mod.head.ref.name == sm_pfb.name, "should have been switched to past head" - sm_mod.commit() == sm_fb.commit, "Head wasn't reset" + assert sm_mod.head.ref.name == sm_pfb.name, "should have been switched to past head" + assert sm_mod.commit() == sm_fb.commit, "Head wasn't reset" self.assertRaises(RepositoryDirtyError, parent_repo.submodule_update, to_latest_revision=True) parent_repo.submodule_update(to_latest_revision=True, force_reset=True) assert sm_mod.commit() == sm_pfb.commit, "Now head should have been reset" assert sm_mod.head.ref.name == sm_pfb.name - @skipIf(not is_win, "Specifically for Windows.") + @skipUnless(sys.platform == "win32", "Specifically for Windows.") def test_to_relative_path_with_super_at_root_drive(self): - class Repo(object): + class Repo: working_tree_dir = "D:\\" super_repo = Repo() @@ -1050,9 +1083,9 @@ class TestSubmodule(TestBase): msg = '_to_relative_path should be "submodule_path" but was "%s"' % relative_path assert relative_path == "submodule_path", msg - @skipIf( - True, - "for some unknown reason the assertion fails, even though it in fact is working in more common setup", + @pytest.mark.xfail( + reason="for some unknown reason the assertion fails, even though it in fact is working in more common setup", + raises=AssertionError, ) @with_rw_directory def test_depth(self, rwdir): @@ -1169,8 +1202,8 @@ class TestSubmodule(TestBase): "fd::/foo", ] for url in urls: - # The URL will be allowed into the command, but the command will - # fail since we don't have that protocol enabled in the Git config file. + # The URL will be allowed into the command, but the command will fail + # since we don't have that protocol enabled in the Git config file. with self.assertRaises(GitCommandError): Submodule.add(rw_repo, "new", "new", url, allow_unsafe_protocols=True) assert not tmp_file.exists() @@ -1254,8 +1287,8 @@ class TestSubmodule(TestBase): ] for url in urls: submodule = Submodule(rw_repo, b"\0" * 20, name="new", path="new", url=url) - # The URL will be allowed into the command, but the command will - # fail since we don't have that protocol enabled in the Git config file. + # The URL will be allowed into the command, but the command will fail + # since we don't have that protocol enabled in the Git config file. with self.assertRaises(GitCommandError): submodule.update(allow_unsafe_protocols=True) assert not tmp_file.exists() diff --git a/test/test_tree.py b/test/test_tree.py index e597056..7315811 100644 --- a/test/test_tree.py +++ b/test/test_tree.py @@ -1,37 +1,29 @@ -# test_tree.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ from io import BytesIO -from unittest import skipIf +import os.path as osp +from pathlib import Path +import subprocess -from git.objects import Tree, Blob -from test.lib import TestBase -from git.util import HIDE_WINDOWS_KNOWN_ERRORS +from git.objects import Blob, Tree +from git.util import cwd -import os.path as osp +from test.lib import TestBase, with_rw_directory class TestTree(TestBase): - @skipIf( - HIDE_WINDOWS_KNOWN_ERRORS, - """ - File "C:\\projects\\gitpython\\git\\cmd.py", line 559, in execute - raise GitCommandNotFound(command, err) - git.exc.GitCommandNotFound: Cmd('git') not found due to: OSError('[WinError 6] The handle is invalid') - cmdline: git cat-file --batch-check""", - ) def test_serializable(self): - # tree at the given commit contains a submodule as well + # Tree at the given commit contains a submodule as well. roottree = self.rorepo.tree("6c1faef799095f3990e9970bc2cb10aa0221cf9c") for item in roottree.traverse(ignore_self=False): if item.type != Tree.type: continue # END skip non-trees tree = item - # trees have no dict + # Trees have no dict. self.assertRaises(AttributeError, setattr, tree, "someattr", 1) orig_data = tree.data_stream.read() @@ -46,19 +38,75 @@ class TestTree(TestBase): testtree._deserialize(stream) assert testtree._cache == orig_cache - # replaces cache, but we make sure of it + # Replaces cache, but we make sure of it. del testtree._cache testtree._deserialize(stream) # END for each item in tree - @skipIf( - HIDE_WINDOWS_KNOWN_ERRORS, - """ - File "C:\\projects\\gitpython\\git\\cmd.py", line 559, in execute - raise GitCommandNotFound(command, err) - git.exc.GitCommandNotFound: Cmd('git') not found due to: OSError('[WinError 6] The handle is invalid') - cmdline: git cat-file --batch-check""", - ) + @with_rw_directory + def _get_git_ordered_files(self, rw_dir): + """Get files as git orders them, to compare in test_tree_modifier_ordering.""" + # Create directory contents. + Path(rw_dir, "file").mkdir() + for filename in ( + "bin", + "bin.d", + "file.to", + "file.toml", + "file.toml.bin", + "file0", + ): + Path(rw_dir, filename).touch() + Path(rw_dir, "file", "a").touch() + + with cwd(rw_dir): + # Prepare the repository. + subprocess.run(["git", "init", "-q"], check=True) + subprocess.run(["git", "add", "."], check=True) + subprocess.run(["git", "commit", "-m", "c1"], check=True) + + # Get git output from which an ordered file list can be parsed. + rev_parse_command = ["git", "rev-parse", "HEAD^{tree}"] + tree_hash = subprocess.check_output(rev_parse_command).decode().strip() + cat_file_command = ["git", "cat-file", "-p", tree_hash] + cat_file_output = subprocess.check_output(cat_file_command).decode() + + return [line.split()[-1] for line in cat_file_output.split("\n") if line] + + def test_tree_modifier_ordering(self): + """TreeModifier.set_done() sorts files in the same order git does.""" + git_file_names_in_order = self._get_git_ordered_files() + + hexsha = "6c1faef799095f3990e9970bc2cb10aa0221cf9c" + roottree = self.rorepo.tree(hexsha) + blob_mode = Tree.blob_id << 12 + tree_mode = Tree.tree_id << 12 + + files_in_desired_order = [ + (blob_mode, "bin"), + (blob_mode, "bin.d"), + (blob_mode, "file.to"), + (blob_mode, "file.toml"), + (blob_mode, "file.toml.bin"), + (blob_mode, "file0"), + (tree_mode, "file"), + ] + mod = roottree.cache + for file_mode, file_name in files_in_desired_order: + mod.add(hexsha, file_mode, file_name) + # end for each file + + def file_names_in_order(): + return [t[1] for t in files_in_desired_order] + + def names_in_mod_cache(): + a = [t[2] for t in mod._cache] + here = file_names_in_order() + return [e for e in a if e in here] + + mod.set_done() + assert names_in_mod_cache() == git_file_names_in_order, "set_done() performs git-sorting" + def test_traverse(self): root = self.rorepo.tree("0.1.6") num_recursive = 0 @@ -72,29 +120,29 @@ class TestTree(TestBase): # END for each object assert all_items == root.list_traverse() - # limit recursion level to 0 - should be same as default iteration + # Limit recursion level to 0 - should be same as default iteration. assert all_items assert "CHANGES" in root assert len(list(root)) == len(list(root.traverse(depth=1))) - # only choose trees + # Only choose trees. trees_only = lambda i, d: i.type == "tree" trees = list(root.traverse(predicate=trees_only)) assert len(trees) == len([i for i in root.traverse() if trees_only(i, 0)]) - # test prune + # Test prune. lib_folder = lambda t, d: t.path == "lib" pruned_trees = list(root.traverse(predicate=trees_only, prune=lib_folder)) assert len(pruned_trees) < len(trees) - # trees and blobs + # Trees and blobs. assert len(set(trees) | set(root.trees)) == len(trees) assert len({b for b in root if isinstance(b, Blob)} | set(root.blobs)) == len(root.blobs) subitem = trees[0][0] assert "/" in subitem.path assert subitem.name == osp.basename(subitem.path) - # assure that at some point the traversed paths have a slash in them + # Check that at some point the traversed paths have a slash in them. found_slash = False for item in root.traverse(): assert osp.isabs(item.abspath) @@ -102,8 +150,8 @@ class TestTree(TestBase): found_slash = True # END check for slash - # slashes in paths are supported as well - # NOTE: on py3, / doesn't work with strings anymore ... + # Slashes in paths are supported as well. + # NOTE: On Python 3, / doesn't work with strings anymore... assert root[item.path] == item == root / item.path # END for each item assert found_slash diff --git a/test/test_util.py b/test/test_util.py index 42edc57..dad2f3d 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -1,188 +1,428 @@ -# test_utils.py # Copyright (C) 2008, 2009 Michael Trier (mtrier@gmail.com) and contributors # -# This module is part of GitPython and is released under -# the BSD License: https://opensource.org/license/bsd-3-clause/ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ +import ast +from datetime import datetime import os +import pathlib import pickle +import stat +import subprocess import sys import tempfile import time -from unittest import mock, skipIf -from datetime import datetime +from unittest import SkipTest, mock -import pytest import ddt +import pytest from git.cmd import dashify -from git.compat import is_win from git.objects.util import ( altz_to_utctz_str, - utctz_to_altz, - verify_utctz, + from_timestamp, parse_date, tzoffset, - from_timestamp, -) -from test.lib import ( - TestBase, - with_rw_repo, + utctz_to_altz, + verify_utctz, ) from git.util import ( - LockFile, - BlockingLockFile, - get_user_id, Actor, + BlockingLockFile, IterableList, + LockFile, cygpath, decygpath, + get_user_id, remove_password_if_present, + rmtree, ) +from test.lib import TestBase, with_rw_repo + + +@pytest.fixture +def permission_error_tmpdir(tmp_path): + """Fixture to test permissions errors in situations where they are not overcome.""" + td = tmp_path / "testdir" + td.mkdir() + (td / "x").touch() + + # Set up PermissionError on Windows, where we can't delete read-only files. + (td / "x").chmod(stat.S_IRUSR) + + # Set up PermissionError on Unix, where non-root users can't delete files in + # read-only directories. (Tests that rely on this and assert that rmtree raises + # PermissionError will fail if they are run as root.) + td.chmod(stat.S_IRUSR | stat.S_IXUSR) + + yield td + + +class TestRmtree: + """Tests for :func:`git.util.rmtree`.""" + + def test_deletes_nested_dir_with_files(self, tmp_path): + td = tmp_path / "testdir" + + for d in td, td / "q", td / "s": + d.mkdir() + for f in ( + td / "p", + td / "q" / "w", + td / "q" / "x", + td / "r", + td / "s" / "y", + td / "s" / "z", + ): + f.touch() + + try: + rmtree(td) + except SkipTest as ex: + pytest.fail(f"rmtree unexpectedly attempts skip: {ex!r}") + + assert not td.exists() + + @pytest.mark.skipif( + sys.platform == "cygwin", + reason="Cygwin can't set the permissions that make the test meaningful.", + ) + def test_deletes_dir_with_readonly_files(self, tmp_path): + # Automatically works on Unix, but requires special handling on Windows. + # Not to be confused with what permission_error_tmpdir sets up (see below). + + td = tmp_path / "testdir" + + for d in td, td / "sub": + d.mkdir() + for f in td / "x", td / "sub" / "y": + f.touch() + f.chmod(0) + + try: + rmtree(td) + except SkipTest as ex: + self.fail(f"rmtree unexpectedly attempts skip: {ex!r}") + + assert not td.exists() + + @pytest.mark.skipif( + sys.platform == "cygwin", + reason="Cygwin can't set the permissions that make the test meaningful.", + ) + def test_avoids_changing_permissions_outside_tree(self, tmp_path): + # Automatically works on Windows, but on Unix requires either special handling + # or refraining from attempting to fix PermissionError by making chmod calls. + + dir1 = tmp_path / "dir1" + dir1.mkdir() + (dir1 / "file").touch() + (dir1 / "file").chmod(stat.S_IRUSR) + old_mode = (dir1 / "file").stat().st_mode + + dir2 = tmp_path / "dir2" + dir2.mkdir() + (dir2 / "symlink").symlink_to(dir1 / "file") + dir2.chmod(stat.S_IRUSR | stat.S_IXUSR) + + try: + rmtree(dir2) + except PermissionError: + pass # On Unix, dir2 is not writable, so dir2/symlink may not be deleted. + except SkipTest as ex: + self.fail(f"rmtree unexpectedly attempts skip: {ex!r}") + + new_mode = (dir1 / "file").stat().st_mode + assert old_mode == new_mode, f"Should stay {old_mode:#o}, became {new_mode:#o}." + + def _patch_for_wrapping_test(self, mocker, hide_windows_known_errors): + # Access the module through sys.modules so it is unambiguous which module's + # attribute we patch: the original git.util, not git.index.util even though + # git.index.util "replaces" git.util and is what "import git.util" gives us. + mocker.patch.object(sys.modules["git.util"], "HIDE_WINDOWS_KNOWN_ERRORS", hide_windows_known_errors) + + # Mock out common chmod functions to simulate PermissionError the callback can't + # fix. (We leave the corresponding lchmod functions alone. If they're used, it's + # more important we detect any failures from inadequate compatibility checks.) + mocker.patch.object(os, "chmod") + mocker.patch.object(pathlib.Path, "chmod") + + @pytest.mark.skipif( + sys.platform != "win32", + reason="PermissionError is only ever wrapped on Windows", + ) + def test_wraps_perm_error_if_enabled(self, mocker, permission_error_tmpdir): + """rmtree wraps PermissionError on Windows when HIDE_WINDOWS_KNOWN_ERRORS is + true.""" + self._patch_for_wrapping_test(mocker, True) + + with pytest.raises(SkipTest): + rmtree(permission_error_tmpdir) + + @pytest.mark.skipif( + sys.platform == "cygwin", + reason="Cygwin can't set the permissions that make the test meaningful.", + ) + @pytest.mark.parametrize( + "hide_windows_known_errors", + [ + pytest.param(False), + pytest.param(True, marks=pytest.mark.skipif(sys.platform == "win32", reason="We would wrap on Windows")), + ], + ) + def test_does_not_wrap_perm_error_unless_enabled(self, mocker, permission_error_tmpdir, hide_windows_known_errors): + """rmtree does not wrap PermissionError on non-Windows systems or when + HIDE_WINDOWS_KNOWN_ERRORS is false.""" + self._patch_for_wrapping_test(mocker, hide_windows_known_errors) + + with pytest.raises(PermissionError): + try: + rmtree(permission_error_tmpdir) + except SkipTest as ex: + pytest.fail(f"rmtree unexpectedly attempts skip: {ex!r}") + + @pytest.mark.parametrize("hide_windows_known_errors", [False, True]) + def test_does_not_wrap_other_errors(self, tmp_path, mocker, hide_windows_known_errors): + # The file is deliberately never created. + file_not_found_tmpdir = tmp_path / "testdir" + + self._patch_for_wrapping_test(mocker, hide_windows_known_errors) + + with pytest.raises(FileNotFoundError): + try: + rmtree(file_not_found_tmpdir) + except SkipTest as ex: + self.fail(f"rmtree unexpectedly attempts skip: {ex!r}") + + +class TestEnvParsing: + """Tests for environment variable parsing logic in :mod:`git.util`.""" + + @staticmethod + def _run_parse(name, value): + command = [ + sys.executable, + "-c", + f"from git.util import {name}; print(repr({name}))", + ] + output = subprocess.check_output( + command, + env=None if value is None else dict(os.environ, **{name: value}), + text=True, + ) + return ast.literal_eval(output) + + @pytest.mark.skipif( + sys.platform != "win32", + reason="These environment variables are only used on Windows.", + ) + @pytest.mark.parametrize( + "env_var_value, expected_truth_value", + [ + (None, True), # When the environment variable is unset. + ("", False), + (" ", False), + ("0", False), + ("1", True), + ("false", False), + ("true", True), + ("False", False), + ("True", True), + ("no", False), + ("yes", True), + ("NO", False), + ("YES", True), + (" no ", False), + (" yes ", True), + ], + ) + @pytest.mark.parametrize( + "name", + [ + "HIDE_WINDOWS_KNOWN_ERRORS", + "HIDE_WINDOWS_FREEZE_ERRORS", + ], + ) + def test_env_vars_for_windows_tests(self, name, env_var_value, expected_truth_value): + actual_parsed_value = self._run_parse(name, env_var_value) + assert actual_parsed_value is expected_truth_value + + +def _xfail_param(*values, **xfail_kwargs): + """Build a pytest.mark.parametrize parameter that carries an xfail mark.""" + return pytest.param(*values, marks=pytest.mark.xfail(**xfail_kwargs)) + _norm_cygpath_pairs = ( - (r"foo\bar", "foo/bar"), - (r"foo/bar", "foo/bar"), - (r"C:\Users", "/cygdrive/c/Users"), - (r"C:\d/e", "/cygdrive/c/d/e"), + (R"foo\bar", "foo/bar"), + (R"foo/bar", "foo/bar"), + (R"C:\Users", "/cygdrive/c/Users"), + (R"C:\d/e", "/cygdrive/c/d/e"), ("C:\\", "/cygdrive/c/"), - (r"\\server\C$\Users", "//server/C$/Users"), - (r"\\server\C$", "//server/C$"), + (R"\\server\C$\Users", "//server/C$/Users"), + (R"\\server\C$", "//server/C$"), ("\\\\server\\c$\\", "//server/c$/"), - (r"\\server\BAR/", "//server/BAR/"), - (r"D:/Apps", "/cygdrive/d/Apps"), - (r"D:/Apps\fOO", "/cygdrive/d/Apps/fOO"), - (r"D:\Apps/123", "/cygdrive/d/Apps/123"), + (R"\\server\BAR/", "//server/BAR/"), + (R"D:/Apps", "/cygdrive/d/Apps"), + (R"D:/Apps\fOO", "/cygdrive/d/Apps/fOO"), + (R"D:\Apps/123", "/cygdrive/d/Apps/123"), ) +"""Path test cases for cygpath and decygpath, other than extended UNC paths.""" _unc_cygpath_pairs = ( - (r"\\?\a:\com", "/cygdrive/a/com"), - (r"\\?\a:/com", "/cygdrive/a/com"), - (r"\\?\UNC\server\D$\Apps", "//server/D$/Apps"), + (R"\\?\a:\com", "/cygdrive/a/com"), + (R"\\?\a:/com", "/cygdrive/a/com"), + (R"\\?\UNC\server\D$\Apps", "//server/D$/Apps"), ) +"""Extended UNC path test cases for cygpath.""" + +_cygpath_ok_xfails = { + # From _norm_cygpath_pairs: + (R"C:\Users", "/cygdrive/c/Users"): "/proc/cygdrive/c/Users", + (R"C:\d/e", "/cygdrive/c/d/e"): "/proc/cygdrive/c/d/e", + ("C:\\", "/cygdrive/c/"): "/proc/cygdrive/c/", + (R"\\server\BAR/", "//server/BAR/"): "//server/BAR", + (R"D:/Apps", "/cygdrive/d/Apps"): "/proc/cygdrive/d/Apps", + (R"D:/Apps\fOO", "/cygdrive/d/Apps/fOO"): "/proc/cygdrive/d/Apps/fOO", + (R"D:\Apps/123", "/cygdrive/d/Apps/123"): "/proc/cygdrive/d/Apps/123", + # From _unc_cygpath_pairs: + (R"\\?\a:\com", "/cygdrive/a/com"): "/proc/cygdrive/a/com", + (R"\\?\a:/com", "/cygdrive/a/com"): "/proc/cygdrive/a/com", +} +"""Mapping of expected failures for the test_cygpath_ok test.""" + + +_cygpath_ok_params = [ + ( + _xfail_param(*case, reason=f"Returns: {_cygpath_ok_xfails[case]!r}", raises=AssertionError) + if case in _cygpath_ok_xfails + else case + ) + for case in _norm_cygpath_pairs + _unc_cygpath_pairs +] +"""Parameter sets for the test_cygpath_ok test.""" + + +@pytest.mark.skipif(sys.platform != "cygwin", reason="Paths specifically for Cygwin.") +class TestCygpath: + """Tests for :func:`git.util.cygpath` and :func:`git.util.decygpath`.""" + + @pytest.mark.parametrize("wpath, cpath", _cygpath_ok_params) + def test_cygpath_ok(self, wpath, cpath): + cwpath = cygpath(wpath) + assert cwpath == cpath, wpath + + @pytest.mark.parametrize( + "wpath, cpath", + [ + (R"./bar", "bar"), + _xfail_param(R".\bar", "bar", reason="Returns: './bar'", raises=AssertionError), + (R"../bar", "../bar"), + (R"..\bar", "../bar"), + (R"../bar/.\foo/../chu", "../bar/chu"), + ], + ) + def test_cygpath_norm_ok(self, wpath, cpath): + cwpath = cygpath(wpath) + assert cwpath == (cpath or wpath), wpath + + @pytest.mark.parametrize( + "wpath", + [ + R"C:", + R"C:Relative", + R"D:Apps\123", + R"D:Apps/123", + R"\\?\a:rel", + R"\\share\a:rel", + ], + ) + def test_cygpath_invalids(self, wpath): + cwpath = cygpath(wpath) + assert cwpath == wpath.replace("\\", "/"), wpath + @pytest.mark.parametrize("wpath, cpath", _norm_cygpath_pairs) + def test_decygpath(self, wpath, cpath): + wcpath = decygpath(cpath) + assert wcpath == wpath.replace("/", "\\"), cpath -class TestIterableMember(object): - """A member of an iterable list""" +class _Member: + """A member of an IterableList.""" - __slots__ = "name" + __slots__ = ("name",) def __init__(self, name): self.name = name def __repr__(self): - return "TestIterableMember(%r)" % self.name + return f"{type(self).__name__}({self.name!r})" @ddt.ddt class TestUtils(TestBase): - def setup(self): - self.testdict = { - "string": "42", - "int": 42, - "array": [42], - } - - @skipIf(not is_win, "Paths specifically for Windows.") - @ddt.idata(_norm_cygpath_pairs + _unc_cygpath_pairs) - def test_cygpath_ok(self, case): - wpath, cpath = case - cwpath = cygpath(wpath) - self.assertEqual(cwpath, cpath, wpath) - - @skipIf(not is_win, "Paths specifically for Windows.") - @ddt.data( - (r"./bar", "bar"), - (r".\bar", "bar"), - (r"../bar", "../bar"), - (r"..\bar", "../bar"), - (r"../bar/.\foo/../chu", "../bar/chu"), - ) - def test_cygpath_norm_ok(self, case): - wpath, cpath = case - cwpath = cygpath(wpath) - self.assertEqual(cwpath, cpath or wpath, wpath) - - @skipIf(not is_win, "Paths specifically for Windows.") - @ddt.data( - r"C:", - r"C:Relative", - r"D:Apps\123", - r"D:Apps/123", - r"\\?\a:rel", - r"\\share\a:rel", - ) - def test_cygpath_invalids(self, wpath): - cwpath = cygpath(wpath) - self.assertEqual(cwpath, wpath.replace("\\", "/"), wpath) - - @skipIf(not is_win, "Paths specifically for Windows.") - @ddt.idata(_norm_cygpath_pairs) - def test_decygpath(self, case): - wpath, cpath = case - wcpath = decygpath(cpath) - self.assertEqual(wcpath, wpath.replace("/", "\\"), cpath) + """Tests for most utilities in :mod:`git.util`.""" def test_it_should_dashify(self): self.assertEqual("this-is-my-argument", dashify("this_is_my_argument")) self.assertEqual("foo", dashify("foo")) def test_lock_file(self): - my_file = tempfile.mktemp() - lock_file = LockFile(my_file) - assert not lock_file._has_lock() - # release lock we don't have - fine - lock_file._release_lock() + with tempfile.TemporaryDirectory() as tdir: + my_file = os.path.join(tdir, "my-lock-file") + lock_file = LockFile(my_file) + assert not lock_file._has_lock() + # Release lock we don't have - fine. + lock_file._release_lock() - # get lock - lock_file._obtain_lock_or_raise() - assert lock_file._has_lock() + # Get lock. + lock_file._obtain_lock_or_raise() + assert lock_file._has_lock() - # concurrent access - other_lock_file = LockFile(my_file) - assert not other_lock_file._has_lock() - self.assertRaises(IOError, other_lock_file._obtain_lock_or_raise) + # Concurrent access. + other_lock_file = LockFile(my_file) + assert not other_lock_file._has_lock() + self.assertRaises(IOError, other_lock_file._obtain_lock_or_raise) - lock_file._release_lock() - assert not lock_file._has_lock() + lock_file._release_lock() + assert not lock_file._has_lock() - other_lock_file._obtain_lock_or_raise() - self.assertRaises(IOError, lock_file._obtain_lock_or_raise) + other_lock_file._obtain_lock_or_raise() + self.assertRaises(IOError, lock_file._obtain_lock_or_raise) - # auto-release on destruction - del other_lock_file - lock_file._obtain_lock_or_raise() - lock_file._release_lock() + # Auto-release on destruction. + del other_lock_file + lock_file._obtain_lock_or_raise() + lock_file._release_lock() - @pytest.mark.xfail( - sys.platform == "cygwin", - reason="Cygwin fails here for some reason, always", - raises=AssertionError, - ) def test_blocking_lock_file(self): - my_file = tempfile.mktemp() - lock_file = BlockingLockFile(my_file) - lock_file._obtain_lock() - - # next one waits for the lock - start = time.time() - wait_time = 0.1 - wait_lock = BlockingLockFile(my_file, 0.05, wait_time) - self.assertRaises(IOError, wait_lock._obtain_lock) - elapsed = time.time() - start + with tempfile.TemporaryDirectory() as tdir: + my_file = os.path.join(tdir, "my-lock-file") + lock_file = BlockingLockFile(my_file) + lock_file._obtain_lock() + + # Next one waits for the lock. + start = time.time() + wait_time = 0.1 + wait_lock = BlockingLockFile(my_file, 0.05, wait_time) + self.assertRaises(IOError, wait_lock._obtain_lock) + elapsed = time.time() - start + extra_time = 0.02 - if is_win: - # for Appveyor - extra_time *= 6 # NOTE: Indeterministic failures here... + if sys.platform in {"win32", "cygwin"}: + extra_time *= 6 # Without this, we get indeterministic failures on Windows. + elif sys.platform == "darwin": + extra_time *= 18 # The situation on macOS is similar, but with more delay. + self.assertLess(elapsed, wait_time + extra_time) def test_user_id(self): self.assertIn("@", get_user_id()) def test_parse_date(self): - # parse_date(from_timestamp()) must return the tuple unchanged + # parse_date(from_timestamp()) must return the tuple unchanged. for timestamp, offset in ( (1522827734, -7200), (1522827734, 0), @@ -190,7 +430,7 @@ class TestUtils(TestBase): ): self.assertEqual(parse_date(from_timestamp(timestamp, offset)), (timestamp, offset)) - # test all supported formats + # Test all supported formats. def assert_rval(rval, veri_time, offset=0): self.assertEqual(len(rval), 2) self.assertIsInstance(rval[0], int) @@ -198,7 +438,7 @@ class TestUtils(TestBase): self.assertEqual(rval[0], veri_time) self.assertEqual(rval[1], offset) - # now that we are here, test our conversion functions as well + # Now that we are here, test our conversion functions as well. utctz = altz_to_utctz_str(offset) self.assertIsInstance(utctz, str) self.assertEqual(utctz_to_altz(verify_utctz(utctz)), offset) @@ -211,13 +451,13 @@ class TestUtils(TestBase): iso3 = ("2005.04.07 22:13:11 -0000", 0) alt = ("04/07/2005 22:13:11", 0) alt2 = ("07.04.2005 22:13:11", 0) - veri_time_utc = 1112911991 # the time this represents, in time since epoch, UTC + veri_time_utc = 1112911991 # The time this represents, in time since epoch, UTC. for date, offset in (rfc, iso, iso2, iso3, alt, alt2): assert_rval(parse_date(date), veri_time_utc, offset) # END for each date type - # and failure - self.assertRaises(ValueError, parse_date, datetime.now()) # non-aware datetime + # ...and failure. + self.assertRaises(ValueError, parse_date, datetime.now()) # Non-aware datetime. self.assertRaises(ValueError, parse_date, "invalid format") self.assertRaises(ValueError, parse_date, "123456789 -02000") self.assertRaises(ValueError, parse_date, " 123456789 -0200") @@ -226,7 +466,7 @@ class TestUtils(TestBase): for cr in (None, self.rorepo.config_reader()): self.assertIsInstance(Actor.committer(cr), Actor) self.assertIsInstance(Actor.author(cr), Actor) - # END assure config reader is handled + # END ensure config reader is handled @with_rw_repo("HEAD") @mock.patch("getpass.getuser") @@ -266,8 +506,9 @@ class TestUtils(TestBase): mock_get_uid.return_value = "user" committer = Actor.committer(None) author = Actor.author(None) - # We can't test with `self.rorepo.config_reader()` here, as the uuid laziness - # depends on whether the user running the test has their global user.name config set. + # We can't test with `self.rorepo.config_reader()` here, as the UUID laziness + # depends on whether the user running the test has their global user.name config + # set. self.assertEqual(committer.name, "user") self.assertTrue(committer.email.startswith("user@")) self.assertEqual(author.name, "user") @@ -283,44 +524,47 @@ class TestUtils(TestBase): Actor("name last another", "some-very-long-email@example.com"), ) - @ddt.data(("name", ""), ("name", "prefix_")) + @ddt.data( + ("name", ""), + ("name", "prefix_"), + ) def test_iterable_list(self, case): name, prefix = case ilist = IterableList(name, prefix) name1 = "one" name2 = "two" - m1 = TestIterableMember(prefix + name1) - m2 = TestIterableMember(prefix + name2) + m1 = _Member(prefix + name1) + m2 = _Member(prefix + name2) ilist.extend((m1, m2)) self.assertEqual(len(ilist), 2) - # contains works with name and identity + # Contains works with name and identity. self.assertIn(name1, ilist) self.assertIn(name2, ilist) self.assertIn(m2, ilist) self.assertIn(m2, ilist) self.assertNotIn("invalid", ilist) - # with string index + # With string index. self.assertIs(ilist[name1], m1) self.assertIs(ilist[name2], m2) - # with int index + # With int index. self.assertIs(ilist[0], m1) self.assertIs(ilist[1], m2) - # with getattr + # With getattr. self.assertIs(ilist.one, m1) self.assertIs(ilist.two, m2) - # test exceptions + # Test exceptions. self.assertRaises(AttributeError, getattr, ilist, "something") self.assertRaises(IndexError, ilist.__getitem__, "something") - # delete by name and index + # Delete by name and index. self.assertRaises(IndexError, ilist.__delitem__, "something") del ilist[name2] self.assertEqual(len(ilist), 1) @@ -355,21 +599,21 @@ class TestUtils(TestBase): self.assertEqual(altz_to_utctz_str(-59), "+0000") def test_from_timestamp(self): - # Correct offset: UTC+2, should return datetime + tzoffset(+2) + # Correct offset: UTC+2, should return datetime + tzoffset(+2). altz = utctz_to_altz("+0200") self.assertEqual( datetime.fromtimestamp(1522827734, tzoffset(altz)), from_timestamp(1522827734, altz), ) - # Wrong offset: UTC+58, should return datetime + tzoffset(UTC) + # Wrong offset: UTC+58, should return datetime + tzoffset(UTC). altz = utctz_to_altz("+5800") self.assertEqual( datetime.fromtimestamp(1522827734, tzoffset(0)), from_timestamp(1522827734, altz), ) - # Wrong offset: UTC-9000, should return datetime + tzoffset(UTC) + # Wrong offset: UTC-9000, should return datetime + tzoffset(UTC). altz = utctz_to_altz("-9000") self.assertEqual( datetime.fromtimestamp(1522827734, tzoffset(0)), @@ -399,7 +643,7 @@ class TestUtils(TestBase): redacted_cmd_1 = remove_password_if_present(cmd_1) assert username not in " ".join(redacted_cmd_1) assert password not in " ".join(redacted_cmd_1) - # Check that we use a copy + # Check that we use a copy. assert cmd_1 is not redacted_cmd_1 assert username in " ".join(cmd_1) assert password in " ".join(cmd_1) diff --git a/test/tstrunner.py b/test/tstrunner.py index 441050c..fc9a59c 100644 --- a/test/tstrunner.py +++ b/test/tstrunner.py @@ -1,3 +1,8 @@ +# This module is part of GitPython and is released under the +# 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ + +"""Hook for MonkeyType (see PR #1188).""" + import unittest loader = unittest.TestLoader() diff --git a/tox.ini b/tox.ini index 82a41e2..fc62fa5 100644 --- a/tox.ini +++ b/tox.ini @@ -1,6 +1,6 @@ [tox] requires = tox>=4 -env_list = py{37,38,39,310,311,312}, lint, mypy, black +env_list = py{37,38,39,310,311,312}, ruff, format, mypy, html, misc [testenv] description = Run unit tests @@ -9,27 +9,42 @@ extras = test pass_env = SSH_* commands = pytest --color=yes {posargs} -[testenv:lint] -description = Lint via pre-commit -base_python = py39 -commands = pre-commit run --all-files +[testenv:ruff] +description = Lint with Ruff +base_python = py{39,310,311,312,38,37} +deps = ruff +set_env = + CLICOLOR_FORCE = 1 # Set NO_COLOR to override this. +commands = ruff check . + +[testenv:format] +description = Check formatting with Ruff +base_python = py{39,310,311,312,38,37} +deps = ruff +set_env = + CLICOLOR_FORCE = 1 # Set NO_COLOR to override this. +commands = ruff format --check . [testenv:mypy] description = Typecheck with mypy -base_python = py39 -commands = mypy -p git +base_python = py{39,310,311,312,38,37} +set_env = + MYPY_FORCE_COLOR = 1 +commands = mypy ignore_outcome = true -[testenv:black] -description = Check style with black -base_python = py39 -commands = black --check --diff . - -# Run "tox -e html" for this. It is deliberately excluded from env_list, as -# unlike the other environments, this one writes outside the .tox/ directory. [testenv:html] description = Build HTML documentation -base_python = py39 -deps = -r doc/requirements.txt +base_python = py{39,310,311,312,38,37} +extras = doc allowlist_externals = make -commands = make -C doc html +commands = + make BUILDDIR={env_tmp_dir}/doc/build -C doc clean + make BUILDDIR={env_tmp_dir}/doc/build -C doc html + +[testenv:misc] +description = Run other checks via pre-commit +base_python = py{39,310,311,312,38,37} +set_env = + SKIP = ruff-format,ruff +commands = pre-commit run --all-files -- GitLab From 7bda8b1358fc8691ba357f27bf11b77a25702bda Mon Sep 17 00:00:00 2001 From: Geovanna Maciel <geoojv@gmail.com> Date: Thu, 30 Jan 2025 09:59:22 -0300 Subject: [PATCH 2/3] Update changelog for 3.1.44-1 release --- debian/changelog | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/debian/changelog b/debian/changelog index 0b802c2..7e8b0a2 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,11 @@ +python-git (3.1.44-1) UNRELEASED; urgency=medium + + * Team upload. + * New upstream version 3.1.44 + * Update Standards-Version to 4.7.0 + + -- Geovanna Maciel <geoojv@gmail.com> Thu, 30 Jan 2025 09:59:22 -0300 + python-git (3.1.37-3) unstable; urgency=medium * Team upload. -- GitLab From 7d3017c53327f5f7d13252b6bb164de810128606 Mon Sep 17 00:00:00 2001 From: Geovanna Maciel <geoojv@gmail.com> Date: Thu, 30 Jan 2025 10:13:02 -0300 Subject: [PATCH 3/3] Update debian/control Standards-Version --- debian/control | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/control b/debian/control index bc806a5..5bd2689 100644 --- a/debian/control +++ b/debian/control @@ -17,7 +17,7 @@ Build-Depends: debhelper-compat (= 13), python3-pytest <!nocheck>, python3-ddt Build-Depends-Indep: python3-gitdb, -Standards-Version: 4.6.2 +Standards-Version: 4.7.0 Vcs-Git: https://salsa.debian.org/python-team/packages/python-git.git Vcs-Browser: https://salsa.debian.org/python-team/packages/python-git Homepage: https://github.com/gitpython-developers/GitPython -- GitLab