/*
 * Copyright 2025 Bloomberg Finance LP
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include <buildboxcommon_ociclient.h>

#include <buildboxcommon_assetclient.h>
#include <buildboxcommon_casclient.h>
#include <buildboxcommon_digestgenerator.h>
#include <buildboxcommon_exception.h>
#include <buildboxcommon_fileutils.h>
#include <buildboxcommon_httpclient.h>
#include <buildboxcommon_logging.h>
#include <buildboxcommon_mergeutil.h>
#include <buildboxcommon_ocimanifest.h>
#include <buildboxcommon_stringutils.h>
#include <buildboxcommon_systemutils.h>
#include <buildboxcommon_temporarydirectory.h>
#include <buildboxcommon_temporaryfile.h>

#include <array>
#include <cerrno>
#include <cstring>
#include <fcntl.h>
#include <nlohmann/json.hpp>
#include <regex>
#include <spawn.h>
#include <string_view>
#include <sys/wait.h>
#include <ThreadPool.h>
#include <unistd.h>

// environ is needed for posix_spawn
extern char **environ;

namespace buildboxcommon {

OciClient::OciClient(const std::shared_ptr<HTTPClient> &httpClient,
                     const std::shared_ptr<CASClient> &casClient,
                     const std::shared_ptr<AssetClient> &assetClient)
    : OciClient(httpClient, casClient, assetClient, std::nullopt, std::nullopt)
{
}

OciClient::OciClient(const std::shared_ptr<HTTPClient> &httpClient,
                     const std::shared_ptr<CASClient> &casClient,
                     const std::shared_ptr<AssetClient> &assetClient,
                     const std::optional<std::string> &tarBinaryPath,
                     const std::optional<std::string> &authTokenPath)
    : d_httpClient(httpClient), d_casClient(casClient),
      d_assetClient(assetClient), d_tarBinaryPath(tarBinaryPath),
      d_authTokenPath(authTokenPath)
{
    if (!d_casClient) {
        BUILDBOXCOMMON_THROW_EXCEPTION(OciRegistryException,
                                       "CAS client cannot be null");
    }
    if (!d_assetClient) {
        BUILDBOXCOMMON_THROW_EXCEPTION(OciRegistryException,
                                       "Asset client cannot be null");
    }
}

OciUriComponents OciClient::parseOCIuri(const std::string &originalUri)
{
    std::string uri = StringUtils::trim(originalUri);
    if (uri.empty())
        throw std::invalid_argument("Empty image URI");

    // Remove docker:// prefix if present
    constexpr std::string_view dockerPrefix = "docker://";
    if (uri.starts_with(dockerPrefix)) {
        uri = uri.substr(dockerPrefix.length());
    }

    /*
     * The regex pattern below parses Docker/OCI image references with the
     * following parts:
     * "(?:([^/]+)/)?"              - GROUP 1: Optional registry component
     *                                (anything except '/' followed by a '/')
     *                                e.g., "docker.io/",
     *                                "registry.example.com/"
     * "(?:([^/]+/)*)?"             - GROUP 2: Optional namespace component (0+
     *                                path segments)
     *                                e.g., "library/", "myorg/myproject/"
     * "([^:@/]+)"                  - GROUP 3: Required image name (non-empty,
     *                                cannot contain ':', '@', or '/')
     *                                e.g., "alpine", "ubuntu", "myapp"
     * "(?::([^@]+))?"              - GROUP 4: Optional tag (after ':' and not
     *                                containing '@')
     *                                e.g., ":latest", ":3.14", ":v1.2.3"
     * "(?:@(.+))?"                 - GROUP 5: Optional digest (after '@', any
     *                                characters)
     *                                e.g., "@sha256:deadbeef..."
     */
    // [registry/][namespace/]name[:tag][@digest]
    constexpr int REGISTRY_IDX = 1;
    constexpr int NAMESPACE_IDX = 2;
    constexpr int NAME_IDX = 3;
    constexpr int TAG_IDX = 4;
    constexpr int DIGEST_IDX = 5;
    std::smatch m;
    std::regex re(
        "^(?:([^/]+)/)?(?:([^/]+/)*)([^:@/]+)(?::([^@]+))?(?:@(.+))?$");
    if (!std::regex_match(uri, m, re)) {
        throw std::invalid_argument("Malformed image URI: " + uri);
    }

    std::string registry =
        m[REGISTRY_IDX].matched ? m[REGISTRY_IDX].str() : "";
    std::string ns = m[NAMESPACE_IDX].matched ? m[NAMESPACE_IDX].str() : "";
    std::string name = m[NAME_IDX].matched ? m[NAME_IDX].str() : "";
    std::string tag = m[TAG_IDX].matched ? m[TAG_IDX].str() : "";
    std::string digest = m[DIGEST_IDX].matched ? m[DIGEST_IDX].str() : "";

    // Check if the first part is a valid registry (contains a dot)
    // If not, treat it as part of the repository path
    // e.g. myteam/myapp@sha256:...
    if (registry.empty() || (registry.find('.') == std::string::npos)) {
        if (!registry.empty()) {
            // Move registry to namespace
            ns = registry + "/" + ns;
            registry = "";
        }
        // Set default registry
        registry = BUILDBOXCOMMON_DEFAULT_OCI_REGISTRY;
    }

    // Validate name
    if (name.empty())
        throw std::invalid_argument("Missing repository name in URI: " + uri);

    std::string repository = ns + name;
    if (!repository.empty() && repository.back() == '/')
        repository.pop_back();

    std::optional<std::string> optTag;
    std::optional<std::string> optDigest;
    if (!digest.empty()) {
        if (digest.rfind("sha256:", 0) == 0) {
            optDigest = digest.substr(SHA256_PREFIX_LEN); // skip 'sha256:'
        }
        else {
            BUILDBOXCOMMON_THROW_EXCEPTION(
                std::invalid_argument, "Invalid digest format: " << digest);
        }
    }
    else if (!tag.empty()) {
        optTag = tag;
    }
    else {
        optTag = "latest";
    }

    return {registry, repository, optTag, optDigest};
}

bool OciClient::verifyContent(const std::string &blob,
                              const std::string &expectedSha256)
{
    // Ensure the digest function is SHA256
    if (DigestGenerator::digestFunction() !=
        DigestFunction_Value::DigestFunction_Value_SHA256) {
        BUILDBOXCOMMON_THROW_EXCEPTION(
            OciRegistryException,
            "Only SHA256 digest function is supported with OCI registries");
    }

    const Digest hash = DigestGenerator::hash(blob);
    return hash.hash() == expectedSha256;
}

OciManifest OciClient::getOCIManifest(const OciUriComponents &components,
                                      const std::string &platformOs,
                                      const std::string &platformArchitecture)
{
    BUILDBOX_LOG_DEBUG("Fetching OCI manifest for components");

    // We reject 'latest' and untagged URIs as they end up in cache keys and
    // are not deterministic.Custom tags are non-deterministic but user is
    // being explicit.
    if (!components.sha256Digest.has_value() &&
        (!components.tag.has_value() || components.tag.value() == "latest")) {
        BUILDBOXCOMMON_THROW_EXCEPTION(
            OciInvalidUriException,
            "BuildBox rejects 'latest' tags and untagged URIs as they are "
            "not deterministic and a risk for caching. Use explicit tags or "
            "SHA256 digests.");
    }

    std::string reference;
    if (components.sha256Digest.has_value()) {
        reference = "sha256:" + components.sha256Digest.value();
    }
    else {
        reference = components.tag.value();
    }

    BUILDBOX_LOG_INFO("Fetching OCI manifest from registry "
                      << components.registry << " for repository "
                      << components.repository << " with reference "
                      << reference);

    // Create path parts for the registry API call
    std::vector<std::string> pathParts = {components.registry, "v2",
                                          components.repository, "manifests"};

    pathParts.push_back(reference);

    std::string initialDigest;
    HTTPResponse response =
        makeRegistryApiCall(&initialDigest, MANIFEST_MEDIA_TYPE, pathParts);

    BUILDBOX_LOG_DEBUG("Received manifest response with status code: "
                       << response.d_statusCode
                       << " and body:" << response.d_body);

    // Parse initial response to determine media type
    nlohmann::json responseJson;
    try {
        responseJson = nlohmann::json::parse(response.d_body);
    }
    catch (const nlohmann::json::exception &e) {
        BUILDBOXCOMMON_THROW_EXCEPTION(
            OciRegistryApiCallException,
            "Failed to parse initial response JSON: " << e.what());
    }

    std::string mediaType;
    if (responseJson.contains("mediaType")) {
        mediaType = responseJson.at("mediaType").get<std::string>();
    }
    else {
        BUILDBOXCOMMON_THROW_EXCEPTION(
            OciRegistryApiCallException,
            "Missing mediaType field in manifest response");
    }

    std::string finalDigest;
    std::string manifestContent;

    // Check if this is a manifest list/index
    if (mediaType == OCI_IMAGE_INDEX_MEDIA_TYPE ||
        mediaType == DOCKER_MANIFEST_LIST_MEDIA_TYPE) {
        BUILDBOX_LOG_DEBUG("Received manifest list/index, extracting "
                           << platformOs << "/" << platformArchitecture
                           << " manifest");

        OciManifestIndex manifestIndex;
        try {
            manifestIndex = OciManifestIndex::fromJson(responseJson);
        }
        catch (const std::exception &e) {
            BUILDBOXCOMMON_THROW_EXCEPTION(
                OciRegistryApiCallException,
                "Failed to parse manifest index: " << e.what());
        }

        // Set registry metadata for the manifest index
        manifestIndex.d_registryUri = components.registry;
        manifestIndex.d_repository = components.repository;
        manifestIndex.d_reference = "sha256:" + initialDigest;

        // Find exactly one entry matching the specified platform
        std::vector<const OciManifestIndexEntry *> platformEntries;
        for (const auto &entry : manifestIndex.d_manifests) {
            if (entry.d_platform.d_os == platformOs &&
                entry.d_platform.d_architecture == platformArchitecture) {
                platformEntries.push_back(&entry);
            }
        }

        if (platformEntries.empty()) {
            BUILDBOXCOMMON_THROW_EXCEPTION(
                OciUnsupportedPlatformException,
                "No " << platformOs << "/" << platformArchitecture
                      << " platform found in manifest list");
        }

        if (platformEntries.size() > 1) {
            BUILDBOXCOMMON_THROW_EXCEPTION(
                OciRegistryApiCallException,
                "Multiple " << platformOs << "/" << platformArchitecture
                            << " entries found in manifest list");
        }

        const std::string &entryDigest = platformEntries[0]->d_digest;

        // Validate digest format (should be sha256:...)
        if (entryDigest.rfind("sha256:", 0) != 0) {
            BUILDBOXCOMMON_THROW_EXCEPTION(
                OciRegistryApiCallException,
                "Invalid digest format in manifest list entry: "
                    << entryDigest);
        }

        // Make second HTTP call to fetch the actual manifest
        std::vector<std::string> manifestPathParts = {
            components.registry, "v2", components.repository, "manifests",
            entryDigest};

        std::string manifestDigest;
        HTTPResponse manifestResponse = makeRegistryApiCall(
            &manifestDigest, MANIFEST_MEDIA_TYPE, manifestPathParts);

        manifestContent = manifestResponse.d_body;
        finalDigest =
            manifestDigest; // Use the verified digest from the API call
    }
    else if (mediaType == OCI_MANIFEST_MEDIA_TYPE ||
             mediaType == DOCKER_MANIFEST_MEDIA_TYPE) {
        // This is a regular manifest
        manifestContent = response.d_body;
        finalDigest =
            initialDigest; // Use the verified digest from the API call
    }
    else {
        BUILDBOXCOMMON_THROW_EXCEPTION(
            OciRegistryApiCallException,
            "Unsupported manifest media type: "
                << mediaType << ". Expected manifest or manifest list/index");
    }

    OciManifest manifest;
    try {
        auto json = nlohmann::json::parse(manifestContent);
        manifest = OciManifest::fromJson(json);
    }
    catch (const nlohmann::json::exception &e) {
        BUILDBOXCOMMON_THROW_EXCEPTION(
            OciRegistryApiCallException,
            "Failed to parse manifest JSON: " << e.what());
    }

    // Check media type to ensure it's a supported manifest type
    if (manifest.d_mediaType != OCI_MANIFEST_MEDIA_TYPE &&
        manifest.d_mediaType != DOCKER_MANIFEST_MEDIA_TYPE) {
        BUILDBOXCOMMON_THROW_EXCEPTION(OciRegistryApiCallException,
                                       "Unexpected manifest media type: "
                                           << manifest.d_mediaType
                                           << ". Expected either "
                                           << OCI_MANIFEST_MEDIA_TYPE << " or "
                                           << DOCKER_MANIFEST_MEDIA_TYPE);
    }

    manifest.d_registryUri = components.registry;
    manifest.d_repository = components.repository;
    manifest.d_reference = "sha256:" + finalDigest;

    return manifest;
}

HTTPResponse
OciClient::makeRegistryApiCall(std::string *verifiedDigest,
                               const std::string &mediaType,
                               const std::vector<std::string> &pathParts) const
{
    // Create the API URI by joining path parts
    std::string apiUri = StringUtils::join(pathParts, "/");

    HTTPClient::HeaderMap headers;
    if (!mediaType.empty()) {
        headers["Accept"] = mediaType;
    }
    // Use Bearer token auth if configured, otherwise fall back to basic auth
    if (d_authTokenPath.has_value()) {
        std::string token = readAuthToken();
        headers["Authorization"] = "Bearer " + token;
    }
    else {
        // "Basic Og==" is the Base64 encoding of ":" (empty username and empty
        // password)
        headers["Authorization"] = "Basic Og==";
    }

    HTTPResponse response;
    try {
        response = d_httpClient->get(apiUri, headers);

        if (!response.isSuccess()) {
            BUILDBOXCOMMON_THROW_EXCEPTION(
                OciRegistryApiCallException,
                "Registry API call failed with status code: "
                    << response.d_statusCode);
        }

        // Extract and verify digest from docker-content-digest header if
        // present OCI spec recommends not depending on this header but it is
        // still used by all relevant registries
        if (verifiedDigest != nullptr) {
            auto digestHeaderIt =
                response.d_headers.find("docker-content-digest");
            if (digestHeaderIt == response.d_headers.end()) {
                digestHeaderIt =
                    response.d_headers.find("Docker-Content-Digest");
            }

            if (digestHeaderIt != response.d_headers.end()) {
                std::string headerDigest = digestHeaderIt->second;

                // Remove "sha256:" prefix if present
                if (headerDigest.rfind("sha256:", 0) == 0) {
                    headerDigest = headerDigest.substr(SHA256_PREFIX_LEN);
                }
                else {
                    BUILDBOXCOMMON_THROW_EXCEPTION(
                        OciRegistryApiCallException,
                        "Invalid digest format in header: " << headerDigest);
                }

                // Verify the content matches the digest from header
                if (!verifyContent(response.d_body, headerDigest)) {
                    BUILDBOXCOMMON_THROW_EXCEPTION(
                        OciRegistryApiCallException,
                        "Content digest mismatch: manifest content does not "
                        "match "
                        "docker-content-digest header value "
                            << headerDigest);
                }

                *verifiedDigest = headerDigest;
            }
            else {
                // If no header present, calculate digest from content, this is
                // less secure but is compliant with the OCI spec
                const Digest calculatedDigest =
                    DigestGenerator::hash(response.d_body);
                *verifiedDigest = calculatedDigest.hash();
            }
        }
    }
    catch (const HTTPException &e) {
        BUILDBOXCOMMON_THROW_EXCEPTION(
            OciRegistryApiCallException,
            "HTTP error during registry API call: " << e.what());
    }

    return response;
}

HTTPResponse
OciClient::streamRegistryApiCall(const std::vector<std::string> &pathParts,
                                 const StreamCallback &callback) const
{
    std::string apiUri = StringUtils::join(pathParts, "/");

    // Set up headers
    HTTPClient::HeaderMap headers;
    // Use Bearer token auth if configured, otherwise fall back to basic auth
    if (d_authTokenPath.has_value()) {
        std::string token = readAuthToken();
        headers["Authorization"] = "Bearer " + token;
    }
    else {
        headers["Authorization"] = "Basic Og==";
    }

    // Make the streaming HTTP request
    HTTPResponse response;
    try {
        response = d_httpClient->streamDownload(apiUri, headers, callback);

        // Check if the request was successful
        if (!response.isSuccess()) {
            BUILDBOXCOMMON_THROW_EXCEPTION(
                OciRegistryApiCallException,
                "Registry API call failed with status code: "
                    << response.d_statusCode);
        }
    }
    catch (const HTTPException &e) {
        BUILDBOXCOMMON_THROW_EXCEPTION(
            OciRegistryApiCallException,
            "HTTP error during registry API call: " << e.what());
    }

    return response;
}

void OciClient::streamAndExtractLayer(TemporaryDirectory *outputDir,
                                      const std::string &registryUri,
                                      const std::string &repository,
                                      const OciManifestLayer &layer)
{
    if (!outputDir) {
        BUILDBOXCOMMON_THROW_EXCEPTION(
            OciRegistryException, "Output directory pointer cannot be null");
    }

    if (layer.d_mediaType != DOCKER_LAYER_MEDIA_TYPE &&
        layer.d_mediaType != OCI_LAYER_MEDIA_TYPE) {
        BUILDBOXCOMMON_THROW_EXCEPTION(
            OciRegistryException,
            "Layer media type not supported for extraction: "
                << layer.d_mediaType);
    }

    BUILDBOX_LOG_DEBUG("Streaming and extracting layer "
                       << layer.d_digest << " to directory "
                       << outputDir->strname());

    // Create path parts for blob download:
    // {registry}/v2/{repository}/blobs/{digest}
    std::vector<std::string> pathParts = {registryUri, "v2", repository,
                                          "blobs", layer.d_digest};

    // Create pipe for streaming data directly to extraction
    std::array<int, 2> pipeFd = SystemUtils::createPipe();
    pid_t pid = -1;

    // Prepare all data needed for execvp before fork() to ensure fork safety
    const std::string tarCommand = getTarCommand();
    const std::string outputDirName = outputDir->name();

    constexpr int TAR_ARGS_COUNT = 8;
    //  NOLINTBEGIN (cppcoreguidelines-pro-type-const-cast)
    std::array<char *, TAR_ARGS_COUNT> args = {
        const_cast<char *>(tarCommand.c_str()),
        const_cast<char *>("-xzp"),
        const_cast<char *>("--no-same-owner"),
        const_cast<char *>("-f"),
        const_cast<char *>("-"),
        const_cast<char *>("-C"),
        const_cast<char *>(outputDirName.c_str()),
        nullptr};
    //  NOLINTEND (cppcoreguidelines-pro-type-const-cast)

    // Use posix_spawn instead of fork+exec for safer process creation
    posix_spawn_file_actions_t file_actions;
    if (posix_spawn_file_actions_init(&file_actions) != 0) {
        close(pipeFd[0]);
        close(pipeFd[1]);
        BUILDBOXCOMMON_THROW_EXCEPTION(OciTarExtractionException,
                                       "posix_spawn_file_actions_init failed");
    }

    if (posix_spawn_file_actions_adddup2(&file_actions, pipeFd[0],
                                         STDIN_FILENO) != 0) {
        posix_spawn_file_actions_destroy(&file_actions);
        close(pipeFd[0]);
        close(pipeFd[1]);
        BUILDBOXCOMMON_THROW_EXCEPTION(
            OciTarExtractionException,
            "posix_spawn_file_actions_adddup2 failed");
    }

    posix_spawn_file_actions_addclose(&file_actions, pipeFd[0]);
    posix_spawn_file_actions_addclose(&file_actions, pipeFd[1]);

    int spawnStatus = posix_spawn(&pid, tarCommand.c_str(), &file_actions,
                                  nullptr, args.data(), environ);

    posix_spawn_file_actions_destroy(&file_actions);

    if (spawnStatus != 0) {
        close(pipeFd[0]);
        close(pipeFd[1]);
        BUILDBOXCOMMON_THROW_EXCEPTION(
            OciTarExtractionException,
            "Failed to spawn tar extraction process: "
                << strerror(spawnStatus));
    }

    // Parent process - close read end of pipe
    close(pipeFd[0]);
    pipeFd[0] = -1;

    // Initialize SHA256 context for content verification
    DigestContext digestContext = DigestGenerator::createDigestContext();
    size_t totalBytes = 0;

    // Create callback function to write data to pipe and update digest
    auto streamCallback = [&](const char *data, size_t size) -> bool {
        try {
            std::string_view dataView(data, size);
            size_t totalWritten = 0;

            while (totalWritten < size) {
                size_t remainingBytes = size - totalWritten;
                std::string_view remainingData =
                    dataView.substr(totalWritten, remainingBytes);
                ssize_t written = write(pipeFd[1], remainingData.data(),
                                        remainingData.size());

                if (written < 0) {
                    if (errno == EPIPE) {
                        BUILDBOX_LOG_ERROR("Pipe closed by reader (tar "
                                           "process may have failed)");
                        return false;
                    }
                    else {
                        BUILDBOX_LOG_ERROR(
                            "Failed to write to pipe: " << strerror(errno));
                        return false;
                    }
                }
                else if (written == 0) {
                    BUILDBOX_LOG_ERROR("Pipe write returned 0 (unexpected)");
                    return false;
                }
                else {
                    totalWritten += static_cast<size_t>(written);
                }
            }

            // Update digest calculation
            digestContext.update(data, size);
            totalBytes += size;

            return true; // Continue streaming
        }
        catch (const std::exception &e) {
            BUILDBOX_LOG_ERROR("Exception during pipe write: " << e.what());
            return false; // Abort streaming
        }
    };

    HTTPResponse response;
    try {
        // Stream the layer content
        response = streamRegistryApiCall(pathParts, streamCallback);
    }
    catch (...) {
        cleanupTarExtraction(pipeFd, pid);
        throw;
    }

    // Close write end of pipe to signal EOF to tar
    close(pipeFd[1]);
    pipeFd[1] = -1;

    // Wait for tar process to complete
    int status = SystemUtils::waitPid(pid);
    pid = -1;

    if (status != EXIT_SUCCESS) {
        BUILDBOXCOMMON_THROW_EXCEPTION(
            OciTarExtractionException,
            "Tar extraction failed with exit code: " << status);
    }

    // Finalize digest calculation and verify
    Digest calculatedDigest = digestContext.finalizeDigest();

    // Extract SHA256 hash from layer digest (remove "sha256:" prefix)
    std::string expectedHash = layer.d_digest;
    constexpr auto SHA256_PREFIX = "sha256:";
    if (expectedHash.rfind(SHA256_PREFIX, 0) == 0) {
        expectedHash = expectedHash.substr(strlen(SHA256_PREFIX));
    }
    else {
        BUILDBOXCOMMON_THROW_EXCEPTION(
            OciRegistryException,
            "Layer digest must be SHA256 format: " << layer.d_digest);
    }

    if (calculatedDigest.hash() != expectedHash) {
        BUILDBOXCOMMON_THROW_EXCEPTION(OciRegistryApiCallException,
                                       "Content digest mismatch for layer "
                                           << layer.d_digest << ": expected "
                                           << expectedHash << ", calculated "
                                           << toString(calculatedDigest));
    }

    BUILDBOX_LOG_INFO("Fetched layer " << StringUtils::join(pathParts, "/")
                                       << " (" << totalBytes << " bytes) to "
                                       << outputDir->strname());
}

void OciClient::cleanupTarExtraction(std::array<int, 2> &pipeFd,
                                     pid_t pid) const
{
    // Clean up pipe file descriptors
    if (pipeFd[0] != -1) {
        close(pipeFd[0]);
        pipeFd[0] = -1;
    }
    if (pipeFd[1] != -1) {
        close(pipeFd[1]);
        pipeFd[1] = -1;
    }

    // Clean up child process
    if (pid > 0) {
        // Best effort cleanup - don't block on child process
        kill(pid, SIGTERM);
        BUILDBOX_LOG_WARNING("Exception occurred, sent SIGTERM to tar process "
                             << pid);
    }
}

std::string OciClient::getTarCommand() const
{
    if (d_tarBinaryPath.has_value()) {
        return d_tarBinaryPath.value();
    }

    std::string tarLocation = SystemUtils::getPathToCommand("tar");
    BUILDBOX_LOG_DEBUG("Using tar binary resolved from PATH: " << tarLocation);
    if (tarLocation.empty()) {
        BUILDBOXCOMMON_THROW_EXCEPTION(
            OciTarExtractionException,
            "Failed to find tar binary in PATH and no explicit path provided");
    }

    return tarLocation;
}

std::string OciClient::readAuthToken() const
{
    if (!d_authTokenPath.has_value()) {
        BUILDBOXCOMMON_THROW_EXCEPTION(OciRegistryException,
                                       "Auth token path not configured");
    }

    try {
        std::string token =
            FileUtils::getFileContents(d_authTokenPath.value().c_str());
        StringUtils::trim(&token);
        if (token.empty()) {
            BUILDBOXCOMMON_THROW_EXCEPTION(
                OciRegistryException,
                "Auth token file is empty: " << d_authTokenPath.value());
        }
        return token;
    }
    catch (const std::exception &e) {
        BUILDBOXCOMMON_THROW_EXCEPTION(OciRegistryException,
                                       "Failed to read auth token from file '"
                                           << d_authTokenPath.value()
                                           << "': " << e.what());
    }
}
std::pair<Digest, Digest>
OciClient::captureLayerToDigest(const std::string &layerDir,
                                const OciManifestLayer &layer)
{

    BUILDBOX_LOG_DEBUG("Capturing layer directory "
                       << layerDir << " for layer " << layer.d_digest);

    // Use CAS client to capture directory with both tree and root digests
    std::vector<std::string> paths = {layerDir};
    std::vector<std::string> properties = {"unix_mode"};
    constexpr int UNIX_MODE_MASK = 0; // We don't need to set any
                                      // specific UNIX permissions
    try {
        auto response = d_casClient->captureTree(
            paths, properties,
            false, // bypass_local_cache
            UNIX_MODE_MASK,
            nullptr, // requestStats
            Command_OutputDirectoryFormat_TREE_AND_DIRECTORY,
            true); // allowChmodToRead

        if (response.responses_size() == 0) {
            BUILDBOXCOMMON_THROW_EXCEPTION(
                OciRegistryException,
                "CAS captureTree returned no responses for layer "
                    << layer.d_digest);
        }

        const auto &captureResponse = response.responses(0);
        if (captureResponse.status().code() != grpc::StatusCode::OK) {
            BUILDBOXCOMMON_THROW_EXCEPTION(
                OciRegistryException,
                "CAS captureTree failed for layer "
                    << layer.d_digest << ": "
                    << captureResponse.status().message());
        }

        Digest treeDigest = captureResponse.tree_digest();
        Digest rootDigest = captureResponse.root_directory_digest();
        BUILDBOX_LOG_INFO("Successfully captured layer "
                          << layer.d_digest << " to CAS tree digest "
                          << toString(treeDigest) << " and root digest "
                          << toString(rootDigest));

        return std::make_pair(treeDigest, rootDigest);
    }
    catch (const std::exception &e) {
        BUILDBOXCOMMON_THROW_EXCEPTION(
            OciRegistryException,
            "Failed to capture layer " << layer.d_digest << ": " << e.what());
    }
}

Digest OciClient::processLayer(const OciManifest &manifest,
                               const OciManifestLayer &layer)
{
    // 1. Try asset service first
    std::string layerUri = generateLayerUri(manifest, layer);
    auto qualifiers = generateLayerQualifiers(layer);

    auto fetchResult = d_assetClient->tryFetchBlob({layerUri}, qualifiers);

    if (fetchResult.has_value() &&
        d_casClient->findMissingBlobs({fetchResult->digest}).empty()) {
        // Cache hit and blob exists in CAS
        BUILDBOX_LOG_INFO("Asset service cache hit for layer "
                          << layerUri << " with digest "
                          << toString(fetchResult->digest));
        return fetchResult->digest;
    }
    else {
        BUILDBOX_LOG_WARNING("Asset service cache miss for layer "
                             << layerUri);
    }

    // 2. Fallback to original processing
    Digest treeDigest;
    Digest rootDigest;
    try {
        // Create temporary directory for extraction
        TemporaryDirectory tempDir;

        // Stream and extract layer
        streamAndExtractLayer(&tempDir, manifest.d_registryUri,
                              manifest.d_repository, layer);

        // Capture extracted directory to CAS
        std::tie(treeDigest, rootDigest) =
            captureLayerToDigest(tempDir.strname(), layer);
    }
    catch (const std::exception &e) {
        BUILDBOXCOMMON_THROW_EXCEPTION(
            OciRegistryException,
            "Failed to process layer " << layer.d_digest << ": " << e.what());
    }

    // 3. Cache the treeDigest with root digest as referenced directory
    cacheLayerTree(manifest, layer, treeDigest, rootDigest);

    return treeDigest;
}

std::pair<Digest, Digest>
OciClient::mergeLayerTrees(const std::vector<Digest> &layerTreeDigests)
{
    if (layerTreeDigests.empty()) {
        BUILDBOXCOMMON_THROW_EXCEPTION(
            OciRegistryException,
            "Cannot merge empty list of layer tree digests");
    }

    BUILDBOX_LOG_DEBUG("Merging " << layerTreeDigests.size()
                                  << " layer trees");

    // Convert each tree digest to DirectoryTree
    std::vector<MergeUtil::DirectoryTree> directoryTrees;
    directoryTrees.reserve(layerTreeDigests.size());

    for (const auto &treeDigest : layerTreeDigests) {
        BUILDBOX_LOG_DEBUG("Converting tree digest " << treeDigest.hash()
                                                     << " to DirectoryTree");
        auto directoryTree = d_casClient->getTreeFromTreeMessage(treeDigest);
        directoryTrees.push_back(std::move(directoryTree));
    }

    // Merge layers using MergeUtil
    Digest mergedRootDigest;
    digest_string_map newDirectoryBlobs;

    bool mergeSuccess = MergeUtil::createMergedLayersDigest(
        directoryTrees, &mergedRootDigest, &newDirectoryBlobs);

    if (!mergeSuccess) {
        BUILDBOXCOMMON_THROW_EXCEPTION(
            OciRegistryException,
            "Failed to merge layer trees - all trees were empty");
    }

    BUILDBOX_LOG_DEBUG("Successfully merged layers, got "
                       << newDirectoryBlobs.size() << " new directory blobs");

    // Upload new directory blobs to CAS first (so we can call getTree later)
    if (!newDirectoryBlobs.empty()) {
        uploadBlobMap(newDirectoryBlobs);
    }

    // Always calculate both tree and root digests
    // Convert root digest to tree digest using helper function
    auto mergedTree = d_casClient->getTree(mergedRootDigest);

    // Create Tree message from DirectoryTree
    Tree treeMessage;
    if (!mergedTree.empty()) {
        treeMessage.mutable_root()->CopyFrom(mergedTree[0]);
        for (size_t i = 1; i < mergedTree.size(); ++i) {
            treeMessage.add_children()->CopyFrom(mergedTree[i]);
        }
    }

    auto treeBlob = treeMessage.SerializeAsString();
    Digest mergedTreeDigest = DigestGenerator::hash(treeBlob);

    // Upload the tree blob to CAS as well
    digest_string_map treeBlobMap;
    treeBlobMap.emplace(mergedTreeDigest, std::move(treeBlob));
    uploadBlobMap(treeBlobMap);

    BUILDBOX_LOG_INFO("Successfully merged "
                      << layerTreeDigests.size()
                      << " layer trees into tree digest "
                      << toString(mergedTreeDigest) << " and root digest "
                      << toString(mergedRootDigest));

    return std::make_pair(mergedTreeDigest, mergedRootDigest);
}

Digest OciClient::getImageTreeDigest(const std::string &ociUri)
{
    auto [treeDigest, rootDigest] = getImageDigests(ociUri);
    return treeDigest;
}

Digest OciClient::getImageRootDigest(const std::string &ociUri)
{
    auto [treeDigest, rootDigest] = getImageDigests(ociUri);
    return rootDigest;
}

std::pair<Digest, Digest> OciClient::getImageDigests(const std::string &ociUri)
{
    return getImageDigests(ociUri, nullptr);
}

std::pair<Digest, Digest> OciClient::getImageDigests(const std::string &ociUri,
                                                     ThreadPool *threadPool)
{
    BUILDBOX_LOG_DEBUG("Getting image digests for OCI URI: " << ociUri);

    OciUriComponents components;
    try {
        components = parseOCIuri(ociUri);
    }
    catch (const std::exception &e) {
        BUILDBOXCOMMON_THROW_EXCEPTION(
            OciInvalidUriException, "Failed to parse OCI URI: " << e.what());
    }
    OciManifest manifest = getOCIManifest(components);

    // Extract manifest digest from manifest reference (format: "sha256:hash")
    if (manifest.d_reference.rfind("sha256:", 0) != 0) {
        BUILDBOXCOMMON_THROW_EXCEPTION(
            OciRegistryApiCallException,
            "Invalid manifest reference format: " << manifest.d_reference);
    }
    std::string manifestDigest =
        manifest.d_reference.substr(SHA256_PREFIX_LEN);

    // Try full image cache first
    std::string imageUri =
        generateCanonicalImageUri(components, manifestDigest);
    auto qualifiers = generateImageQualifiers(manifest);
    auto fetchResult = d_assetClient->tryFetchBlob({imageUri}, qualifiers);

    if (fetchResult.has_value() &&
        d_casClient->findMissingBlobs({fetchResult->digest}).empty()) {
        BUILDBOX_LOG_INFO("Asset service image cache hit for "
                          << imageUri << " with digest "
                          << toString(fetchResult->digest));
        // Convert cached tree digest to root digest
        auto directoryTree =
            d_casClient->getTreeFromTreeMessage(fetchResult->digest);
        if (directoryTree.empty()) {
            BUILDBOXCOMMON_THROW_EXCEPTION(
                OciRegistryException, "Empty directory tree for cached digest "
                                          << fetchResult->digest.hash());
        }
        // First directory in the tree is the root directory
        const auto rootDirectoryBlob = directoryTree[0].SerializeAsString();
        auto cachedRootDigest = DigestGenerator::hash(rootDirectoryBlob);
        return {fetchResult->digest, cachedRootDigest};
    }
    else {
        BUILDBOX_LOG_WARNING("Asset service image cache miss for "
                             << imageUri);
    }

    // Cache miss: process layers individually using already-fetched manifest
    BUILDBOX_LOG_INFO("Processing image " << manifest.d_repository << " with "
                                          << manifest.d_layers.size()
                                          << " layers");

    std::vector<Digest> layerTreeDigests;
    layerTreeDigests.reserve(manifest.d_layers.size());

    BUILDBOX_LOG_INFO("Processing "
                      << manifest.d_layers.size() << " layers"
                      << (threadPool ? " in parallel" : " sequentially"));

    {
        FutureGroup<Digest> futureGroup(threadPool);
        std::vector<std::shared_future<Digest>> futures;
        futures.reserve(manifest.d_layers.size());

        for (const auto &layer : manifest.d_layers) {
            auto layerLambda = [this, &manifest, &layer]() {
                BUILDBOX_LOG_DEBUG("Processing layer " << layer.d_digest);
                auto layerTreeDigest = processLayer(manifest, layer);
                BUILDBOX_LOG_INFO("Processed layer "
                                  << layer.d_digest << " with tree digest "
                                  << toString(layerTreeDigest));
                return layerTreeDigest;
            };
            futures.emplace_back(futureGroup.add(layerLambda));
        }

        for (auto &future : futures) {
            layerTreeDigests.push_back(future.get());
        }
    }

    if (layerTreeDigests.empty()) {
        BUILDBOXCOMMON_THROW_EXCEPTION(
            OciRegistryException,
            "Image " << ociUri << " has no layers to process");
    }

    auto [finalTreeDigest, finalRootDigest] =
        mergeLayerTrees(layerTreeDigests);

    cacheImageTreeDigest(imageUri, manifest, finalTreeDigest, finalRootDigest);

    BUILDBOX_LOG_INFO("Successfully fetched image digests "
                      << "tree=" << toString(finalTreeDigest)
                      << " root=" << toString(finalRootDigest)
                      << " for container-image=" << ociUri);

    return {finalTreeDigest, finalRootDigest};
}

void OciClient::uploadBlobMap(const digest_string_map &blobMap) const
{
    // Find which blobs are missing from CAS
    std::vector<Digest> newDigests;
    newDigests.reserve(blobMap.size());
    for (const auto &[digest, blob] : blobMap) {
        newDigests.push_back(digest);
    }

    const auto missingDigests = d_casClient->findMissingBlobs(newDigests);

    // Create upload requests for missing blobs
    std::vector<CASClient::UploadRequest> uploadRequests;
    uploadRequests.reserve(missingDigests.size());
    for (const auto &missingDigest : missingDigests) {
        const auto &blob = blobMap.at(missingDigest);
        uploadRequests.emplace_back(missingDigest, blob);
    }

    BUILDBOX_LOG_DEBUG("Uploading " << uploadRequests.size()
                                    << " missing blobs to CAS");

    // Upload the missing blobs
    if (!uploadRequests.empty()) {
        const auto uploadResults = d_casClient->uploadBlobs(uploadRequests);

        // Check for upload failures
        for (const auto &result : uploadResults) {
            if (!result.status.ok()) {
                BUILDBOXCOMMON_THROW_EXCEPTION(
                    OciRegistryException,
                    "Failed to upload blob " << result.digest.hash() << ": "
                                             << result.status.error_message());
            }
        }
    }
}

std::string OciClient::generateLayerUri(const OciManifest &manifest,
                                        const OciManifestLayer &layer) const
{
    return manifest.d_registryUri + "/v2/" + manifest.d_repository +
           "/blobs/" + layer.d_digest;
}

std::vector<std::pair<std::string, std::string>>
OciClient::generateLayerQualifiers(const OciManifestLayer &layer) const
{
    std::vector<std::pair<std::string, std::string>> qualifiers;
    qualifiers.emplace_back(BUILDBOX_OCI_VERSION_QUALIFIER,
                            BUILDBOX_OCI_VERSION);
    qualifiers.emplace_back(RESOURCE_TYPE_QUALIFIER, layer.d_mediaType);
    return qualifiers;
}

void OciClient::cacheLayerTree(const OciManifest &manifest,
                               const OciManifestLayer &layer,
                               const Digest &treeDigest,
                               const Digest &rootDigest)
{
    try {
        std::string layerUri = generateLayerUri(manifest, layer);
        auto qualifiers = generateLayerQualifiers(layer);

        d_assetClient->pushBlob({layerUri}, qualifiers, treeDigest,
                                {} /* referencedBlobs */,
                                {rootDigest} /* referencedDirectories */);

        BUILDBOX_LOG_DEBUG(
            "Cached layer tree "
            << layer.d_digest << " with tree digest " << treeDigest.hash()
            << " and referenced root directory " << rootDigest.hash());
    }
    catch (const std::exception &e) {
        BUILDBOX_LOG_WARNING("Failed to cache layer tree "
                             << layer.d_digest << ": " << e.what());
    }
}

std::string
OciClient::generateCanonicalImageUri(const OciUriComponents &components,
                                     const std::string &manifestDigest) const
{
    // Always use SHA256 digest form for caching, regardless of input
    return components.registry + "/" + components.repository +
           "@sha256:" + manifestDigest;
}

std::vector<std::pair<std::string, std::string>>
OciClient::generateImageQualifiers(const OciManifest &manifest) const
{
    std::vector<std::pair<std::string, std::string>> qualifiers;
    qualifiers.emplace_back(BUILDBOX_OCI_VERSION_QUALIFIER,
                            BUILDBOX_OCI_VERSION);
    qualifiers.emplace_back(RESOURCE_TYPE_QUALIFIER, manifest.d_mediaType);
    qualifiers.emplace_back(PLATFORM_OS_QUALIFIER, DEFAULT_PLATFORM_OS);
    qualifiers.emplace_back(PLATFORM_ARCH_QUALIFIER, DEFAULT_PLATFORM_ARCH);
    return qualifiers;
}

void OciClient::cacheImageTreeDigest(const std::string &canonicalImageUri,
                                     const OciManifest &manifest,
                                     const Digest &treeDigest,
                                     const Digest &rootDigest)
{
    try {
        auto qualifiers = generateImageQualifiers(manifest);

        d_assetClient->pushBlob({canonicalImageUri}, qualifiers, treeDigest,
                                {} /* referencedBlobs */,
                                {rootDigest} /* referencedDirectories */);
        BUILDBOX_LOG_INFO("Cached image tree digest for "
                          << canonicalImageUri
                          << " with referenced root directory "
                          << rootDigest.hash());
    }
    catch (const std::exception &e) {
        BUILDBOX_LOG_WARNING(
            "Failed to cache image tree digest: " << e.what());
    }
}

} // namespace buildboxcommon
