Executing GIT Commands from .NET using a Process

I recently needed some metadata off of git repositories, like a list of indexed files. I initially tried to use LibGit2Sharp, but it often failed to return file information for things like merge commits.

So I just executed the git executable from .NET using a Process, and called it a GitExecutor for lack of a better name. Maybe it is useful for someone else attempting to do the same, so I am sharing it here.

Table of contents

Executing Git Commands: The GitExecutor

We start by defining a GitException, which will be thrown, when an error occurs:

// Licensed under the MIT license. See LICENSE file in the project root for full license information.

namespace ElasticsearchCodeSearch.Indexer.Git.Exceptions
{
    public class GitException : Exception
    {
        public readonly int ExitCode;
        public readonly string Errors;

        public GitException(int exitCode, string errors) 
        { 
            ExitCode = exitCode;
            Errors = errors;
        }
    }
}

And then we can use the System.Diagnostics.Process class to execute the git executable and return the results:

// Licensed under the MIT license. See LICENSE file in the project root for full license information.

using ElasticsearchCodeSearch.Indexer.Git.Exceptions;
using ElasticsearchCodeSearch.Shared.Logging;
using Microsoft.Extensions.Logging;
using System.Diagnostics;
using System.Text;

namespace ElasticsearchCodeSearch.Indexer.Git
{
    public class GitExecutor
    {
        private readonly ILogger<GitExecutor> _logger;

        public GitExecutor(ILogger<GitExecutor> logger) 
        {
            _logger = logger;
        }

        public async Task Clone(string repository_url, string repository_directory, CancellationToken cancellationToken)
        {
            await RunAsync($"clone {repository_url} {repository_directory}", string.Empty, cancellationToken);
        }

        public async Task<string> SHA1(string repository_directory, string path, CancellationToken cancellationToken)
        {
            var result = await RunAsync($"ls-files -s {path}", repository_directory, cancellationToken);

            return result.Split(" ").Skip(1).First();
        }

        public async Task<string> CommitHash(string repository_directory, string path, CancellationToken cancellationToken)
        {
            var result = await RunAsync($"log --pretty=format:\"%H\" -n 1 -- {path}", repository_directory, cancellationToken);

            return result;
        }

        public async Task<DateTime> LatestCommitDate(string repository_directory, string path, CancellationToken cancellationToken)
        {
            var result = await RunAsync($" log -1  --date=iso-strict --format=\"%ad\" -- {path}", repository_directory, cancellationToken);

            if(DateTime.TryParse(result, out var date))
            {
                return date;
            }

            return default;
        }

        public async Task<string[]> ListFiles(string repository_directory, CancellationToken cancellationToken)
        {
            var result = await RunAsync($"ls-files", repository_directory, cancellationToken);

            var files = result
                .Split("\r\n")
                .ToArray();

            return files;
        }

        public async Task<string> RunAsync(string arguments, string workingDirectory, CancellationToken cancellationToken)
        {
            var result = await RunProcessAsync("git", arguments, workingDirectory, cancellationToken);

            if(result.ExitCode != 0)
            {
                throw new GitException(result.ExitCode, result.Errors);
            }

            return result.Output;
        }

        private async Task<(int ExitCode, string Output, string Errors)> RunProcessAsync(string application, string arguments, string workingDirectory, CancellationToken cancellationToken)
        {
            using (var process = new Process())
            {
                process.StartInfo = new ProcessStartInfo
                {
                    CreateNoWindow = true,
                    UseShellExecute = false,
                    RedirectStandardError = true,
                    RedirectStandardOutput = true,
                    FileName = application,
                    Arguments = arguments,
                    WorkingDirectory = workingDirectory,
                };

                var outputBuilder = new StringBuilder();
                var errorsBuilder = new StringBuilder();

                process.OutputDataReceived += (_, args) => outputBuilder.AppendLine(args.Data);
                process.ErrorDataReceived += (_, args) => errorsBuilder.AppendLine(args.Data);

                process.Start();

                process.BeginOutputReadLine();
                process.BeginErrorReadLine();

                await process.WaitForExitAsync(cancellationToken);

                var exitCode = process.ExitCode;
                var output = outputBuilder.ToString().Trim();
                var errors = errorsBuilder.ToString().Trim();

                return (exitCode, output, errors);
            }
        }
    }
}

In the code I start by adding it as a Singleton in the Starup:

builder.Services.AddSingleton<GitExecutor>();

And then use it like this:

// Licensed under the MIT license. See LICENSE file in the project root for full license information.

// ...

namespace ElasticsearchCodeSearch.Indexer.Services
{
    /// <summary>
    /// Git Indexer.
    /// </summary>
    public class GitIndexerService
    {
        private readonly GitExecutor _git;

        public GitIndexerService(GitExecutor git)
        {
            _git = git;
        }

        public async ValueTask IndexRepositoryAsync(RepositoryMetadataDto repositoryMetadata, CancellationToken cancellationToken)
        {
            _logger.TraceMethodEntry();

            try
            {
                // ...

                await _git
                    .Clone(repositoryMetadata.CloneUrl, workingDirectory, cancellationToken)
                    .ConfigureAwait(false);

                // Get the list of allowed files, by matching against allowed extensions (.c, .cpp, ...)
                // and allowed filenames (.gitignore, README, ...). We don't want to parse binary data.
                var batches =  (await _git.ListFiles(workingDirectory, cancellationToken).ConfigureAwait(false))
                    .Where(filename => IsAllowedFile(filename, allowedExtensions, allowedFilenames))
                    .Chunk(_options.BatchSize);

                var parallelOptions = new ParallelOptions()
                {
                    MaxDegreeOfParallelism = _options.MaxParallelBulkRequests,
                    CancellationToken = cancellationToken
                };

                await Parallel
                    .ForEachAsync(source: batches, parallelOptions: parallelOptions, body: (source, cancellationToken) => IndexDocumentsAsync(repositoryMetadata, source, cancellationToken))
                    .ConfigureAwait(false);
            } 
            catch(Exception e)
            {
                _logger.LogError(e, "Indexing Repository '{Repository}' failed", repositoryMetadata.FullName);

                throw;
            } 
            finally
            {
                // ...
            }
        }

        // ...

    ]
}