gcore 0.19.0, Apr 14 25

gcore 0.19.0 published on Monday, Apr 14, 2025 by g-core

gcore.InferenceDeployment

Explore with Pulumi AI

gcore 0.19.0 published on Monday, Apr 14, 2025 by g-core

g-core/terraform-provider-gcore

Example Usage
Basic example
Creating inference deployment
Creating inference deployment with sqs trigger
Create InferenceDeployment Resource
Constructor syntax
Constructor example
InferenceDeployment Resource Properties
Inputs
Outputs
Look up Existing InferenceDeployment Resource
Supporting Types
Import
Package Details

Request a Change

Represent inference deployment

Example Usage

import * as pulumi from "@pulumi/pulumi";
import * as gcore from "@pulumi/gcore";

const project = gcore.getProject({
    name: "Default",
});
const region = gcore.getRegion({
    name: "Luxembourg-2",
});

import pulumi
import pulumi_gcore as gcore

project = gcore.get_project(name="Default")
region = gcore.get_region(name="Luxembourg-2")

package main

import (
	"github.com/pulumi/pulumi-terraform-provider/sdks/go/gcore/gcore"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := gcore.GetProject(ctx, &gcore.GetProjectArgs{
			Name: "Default",
		}, nil)
		if err != nil {
			return err
		}
		_, err = gcore.GetRegion(ctx, &gcore.GetRegionArgs{
			Name: "Luxembourg-2",
		}, nil)
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcore = Pulumi.Gcore;

return await Deployment.RunAsync(() => 
{
    var project = Gcore.GetProject.Invoke(new()
    {
        Name = "Default",
    });

    var region = Gcore.GetRegion.Invoke(new()
    {
        Name = "Luxembourg-2",
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcore.GcoreFunctions;
import com.pulumi.gcore.inputs.GetProjectArgs;
import com.pulumi.gcore.inputs.GetRegionArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        final var project = GcoreFunctions.getProject(GetProjectArgs.builder()
            .name("Default")
            .build());

        final var region = GcoreFunctions.getRegion(GetRegionArgs.builder()
            .name("Luxembourg-2")
            .build());

    }
}

variables:
  project:
    fn::invoke:
      function: gcore:getProject
      arguments:
        name: Default
  region:
    fn::invoke:
      function: gcore:getRegion
      arguments:
        name: Luxembourg-2

Basic example

Creating inference deployment

TypeScript
Python
Go
C#
Java
YAML

import * as pulumi from "@pulumi/pulumi";
import * as gcore from "@pulumi/gcore";

const inf = new gcore.InferenceDeployment("inf", {
    projectId: data.gcore_project.project.id,
    image: "nginx:latest",
    listeningPort: 80,
    flavorName: "inference-4vcpu-16gib",
    containers: [{
        regionId: data.gcore_region.region.id,
        scaleMin: 2,
        scaleMax: 2,
        triggersCpuThreshold: 80,
    }],
    livenessProbe: {
        enabled: true,
        failureThreshold: 3,
        initialDelaySeconds: 10,
        periodSeconds: 10,
        timeoutSeconds: 1,
        successThreshold: 1,
        httpGetPort: 80,
        httpGetHeaders: {
            "User-Agent": "my user agent",
        },
        httpGetHost: "localhost",
        httpGetPath: "/",
        httpGetSchema: "HTTPS",
    },
    readinessProbe: {
        enabled: false,
    },
    startupProbe: {
        enabled: false,
    },
});

import pulumi
import pulumi_gcore as gcore

inf = gcore.InferenceDeployment("inf",
    project_id=data["gcore_project"]["project"]["id"],
    image="nginx:latest",
    listening_port=80,
    flavor_name="inference-4vcpu-16gib",
    containers=[{
        "region_id": data["gcore_region"]["region"]["id"],
        "scale_min": 2,
        "scale_max": 2,
        "triggers_cpu_threshold": 80,
    }],
    liveness_probe={
        "enabled": True,
        "failure_threshold": 3,
        "initial_delay_seconds": 10,
        "period_seconds": 10,
        "timeout_seconds": 1,
        "success_threshold": 1,
        "http_get_port": 80,
        "http_get_headers": {
            "User-Agent": "my user agent",
        },
        "http_get_host": "localhost",
        "http_get_path": "/",
        "http_get_schema": "HTTPS",
    },
    readiness_probe={
        "enabled": False,
    },
    startup_probe={
        "enabled": False,
    })

package main

import (
	"github.com/pulumi/pulumi-terraform-provider/sdks/go/gcore/gcore"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		_, err := gcore.NewInferenceDeployment(ctx, "inf", &gcore.InferenceDeploymentArgs{
			ProjectId:     pulumi.Any(data.Gcore_project.Project.Id),
			Image:         pulumi.String("nginx:latest"),
			ListeningPort: pulumi.Float64(80),
			FlavorName:    pulumi.String("inference-4vcpu-16gib"),
			Containers: gcore.InferenceDeploymentContainerArray{
				&gcore.InferenceDeploymentContainerArgs{
					RegionId:             pulumi.Any(data.Gcore_region.Region.Id),
					ScaleMin:             pulumi.Float64(2),
					ScaleMax:             pulumi.Float64(2),
					TriggersCpuThreshold: pulumi.Float64(80),
				},
			},
			LivenessProbe: &gcore.InferenceDeploymentLivenessProbeArgs{
				Enabled:             pulumi.Bool(true),
				FailureThreshold:    pulumi.Float64(3),
				InitialDelaySeconds: pulumi.Float64(10),
				PeriodSeconds:       pulumi.Float64(10),
				TimeoutSeconds:      pulumi.Float64(1),
				SuccessThreshold:    pulumi.Float64(1),
				HttpGetPort:         pulumi.Float64(80),
				HttpGetHeaders: pulumi.StringMap{
					"User-Agent": pulumi.String("my user agent"),
				},
				HttpGetHost:   pulumi.String("localhost"),
				HttpGetPath:   pulumi.String("/"),
				HttpGetSchema: pulumi.String("HTTPS"),
			},
			ReadinessProbe: &gcore.InferenceDeploymentReadinessProbeArgs{
				Enabled: pulumi.Bool(false),
			},
			StartupProbe: &gcore.InferenceDeploymentStartupProbeArgs{
				Enabled: pulumi.Bool(false),
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcore = Pulumi.Gcore;

return await Deployment.RunAsync(() => 
{
    var inf = new Gcore.InferenceDeployment("inf", new()
    {
        ProjectId = data.Gcore_project.Project.Id,
        Image = "nginx:latest",
        ListeningPort = 80,
        FlavorName = "inference-4vcpu-16gib",
        Containers = new[]
        {
            new Gcore.Inputs.InferenceDeploymentContainerArgs
            {
                RegionId = data.Gcore_region.Region.Id,
                ScaleMin = 2,
                ScaleMax = 2,
                TriggersCpuThreshold = 80,
            },
        },
        LivenessProbe = new Gcore.Inputs.InferenceDeploymentLivenessProbeArgs
        {
            Enabled = true,
            FailureThreshold = 3,
            InitialDelaySeconds = 10,
            PeriodSeconds = 10,
            TimeoutSeconds = 1,
            SuccessThreshold = 1,
            HttpGetPort = 80,
            HttpGetHeaders = 
            {
                { "User-Agent", "my user agent" },
            },
            HttpGetHost = "localhost",
            HttpGetPath = "/",
            HttpGetSchema = "HTTPS",
        },
        ReadinessProbe = new Gcore.Inputs.InferenceDeploymentReadinessProbeArgs
        {
            Enabled = false,
        },
        StartupProbe = new Gcore.Inputs.InferenceDeploymentStartupProbeArgs
        {
            Enabled = false,
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcore.InferenceDeployment;
import com.pulumi.gcore.InferenceDeploymentArgs;
import com.pulumi.gcore.inputs.InferenceDeploymentContainerArgs;
import com.pulumi.gcore.inputs.InferenceDeploymentLivenessProbeArgs;
import com.pulumi.gcore.inputs.InferenceDeploymentReadinessProbeArgs;
import com.pulumi.gcore.inputs.InferenceDeploymentStartupProbeArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var inf = new InferenceDeployment("inf", InferenceDeploymentArgs.builder()
            .projectId(data.gcore_project().project().id())
            .image("nginx:latest")
            .listeningPort(80)
            .flavorName("inference-4vcpu-16gib")
            .containers(InferenceDeploymentContainerArgs.builder()
                .regionId(data.gcore_region().region().id())
                .scaleMin(2)
                .scaleMax(2)
                .triggersCpuThreshold(80)
                .build())
            .livenessProbe(InferenceDeploymentLivenessProbeArgs.builder()
                .enabled(true)
                .failureThreshold(3)
                .initialDelaySeconds(10)
                .periodSeconds(10)
                .timeoutSeconds(1)
                .successThreshold(1)
                .httpGetPort(80)
                .httpGetHeaders(Map.of("User-Agent", "my user agent"))
                .httpGetHost("localhost")
                .httpGetPath("/")
                .httpGetSchema("HTTPS")
                .build())
            .readinessProbe(InferenceDeploymentReadinessProbeArgs.builder()
                .enabled(false)
                .build())
            .startupProbe(InferenceDeploymentStartupProbeArgs.builder()
                .enabled(false)
                .build())
            .build());

    }
}

resources:
  'inf':
    type: gcore:InferenceDeployment
    properties:
      projectId: ${data.gcore_project.project.id}
      image: nginx:latest
      listeningPort: 80
      flavorName: inference-4vcpu-16gib
      containers:
        - regionId: ${data.gcore_region.region.id}
          scaleMin: 2
          scaleMax: 2
          triggersCpuThreshold: 80
      livenessProbe:
        enabled: true
        failureThreshold: 3
        initialDelaySeconds: 10
        periodSeconds: 10
        timeoutSeconds: 1
        successThreshold: 1
        httpGetPort: 80
        httpGetHeaders:
          User-Agent: my user agent
        httpGetHost: localhost
        httpGetPath: /
        httpGetSchema: HTTPS
      readinessProbe:
        enabled: false
      startupProbe:
        enabled: false

Creating inference deployment with sqs trigger

TypeScript
Python
Go
C#
Java
YAML

import * as pulumi from "@pulumi/pulumi";
import * as gcore from "@pulumi/gcore";

const aws = new gcore.InferenceSecret("aws", {
    projectId: data.gcore_project.project.id,
    dataAwsAccessKeyId: "my-aws-access-key-id",
    dataAwsSecretAccessKey: "my-aws-access-key",
});
const inf = new gcore.InferenceDeployment("inf", {
    projectId: data.gcore_project.project.id,
    image: "nginx:latest",
    listeningPort: 80,
    flavorName: "inference-4vcpu-16gib",
    timeout: 60,
    containers: [{
        regionId: data.gcore_region.region.id,
        cooldownPeriod: 60,
        pollingInterval: 60,
        scaleMin: 0,
        scaleMax: 2,
        triggersCpuThreshold: 80,
        triggersSqsSecretName: aws.name,
        triggersSqsAwsRegion: "us-west-2",
        triggersSqsQueueUrl: "https://sqs.us-west-2.amazonaws.com/1234567890/my-queue",
        triggersSqsQueueLength: 5,
        triggersSqsActivationQueueLength: 2,
    }],
    livenessProbe: {
        enabled: false,
    },
    readinessProbe: {
        enabled: false,
    },
    startupProbe: {
        enabled: false,
    },
});

import pulumi
import pulumi_gcore as gcore

aws = gcore.InferenceSecret("aws",
    project_id=data["gcore_project"]["project"]["id"],
    data_aws_access_key_id="my-aws-access-key-id",
    data_aws_secret_access_key="my-aws-access-key")
inf = gcore.InferenceDeployment("inf",
    project_id=data["gcore_project"]["project"]["id"],
    image="nginx:latest",
    listening_port=80,
    flavor_name="inference-4vcpu-16gib",
    timeout=60,
    containers=[{
        "region_id": data["gcore_region"]["region"]["id"],
        "cooldown_period": 60,
        "polling_interval": 60,
        "scale_min": 0,
        "scale_max": 2,
        "triggers_cpu_threshold": 80,
        "triggers_sqs_secret_name": aws.name,
        "triggers_sqs_aws_region": "us-west-2",
        "triggers_sqs_queue_url": "https://sqs.us-west-2.amazonaws.com/1234567890/my-queue",
        "triggers_sqs_queue_length": 5,
        "triggers_sqs_activation_queue_length": 2,
    }],
    liveness_probe={
        "enabled": False,
    },
    readiness_probe={
        "enabled": False,
    },
    startup_probe={
        "enabled": False,
    })

package main

import (
	"github.com/pulumi/pulumi-terraform-provider/sdks/go/gcore/gcore"
	"github.com/pulumi/pulumi/sdk/v3/go/pulumi"
)

func main() {
	pulumi.Run(func(ctx *pulumi.Context) error {
		aws, err := gcore.NewInferenceSecret(ctx, "aws", &gcore.InferenceSecretArgs{
			ProjectId:              pulumi.Any(data.Gcore_project.Project.Id),
			DataAwsAccessKeyId:     pulumi.String("my-aws-access-key-id"),
			DataAwsSecretAccessKey: pulumi.String("my-aws-access-key"),
		})
		if err != nil {
			return err
		}
		_, err = gcore.NewInferenceDeployment(ctx, "inf", &gcore.InferenceDeploymentArgs{
			ProjectId:     pulumi.Any(data.Gcore_project.Project.Id),
			Image:         pulumi.String("nginx:latest"),
			ListeningPort: pulumi.Float64(80),
			FlavorName:    pulumi.String("inference-4vcpu-16gib"),
			Timeout:       pulumi.Float64(60),
			Containers: gcore.InferenceDeploymentContainerArray{
				&gcore.InferenceDeploymentContainerArgs{
					RegionId:                         pulumi.Any(data.Gcore_region.Region.Id),
					CooldownPeriod:                   pulumi.Float64(60),
					PollingInterval:                  pulumi.Float64(60),
					ScaleMin:                         pulumi.Float64(0),
					ScaleMax:                         pulumi.Float64(2),
					TriggersCpuThreshold:             pulumi.Float64(80),
					TriggersSqsSecretName:            aws.Name,
					TriggersSqsAwsRegion:             pulumi.String("us-west-2"),
					TriggersSqsQueueUrl:              pulumi.String("https://sqs.us-west-2.amazonaws.com/1234567890/my-queue"),
					TriggersSqsQueueLength:           pulumi.Float64(5),
					TriggersSqsActivationQueueLength: pulumi.Float64(2),
				},
			},
			LivenessProbe: &gcore.InferenceDeploymentLivenessProbeArgs{
				Enabled: pulumi.Bool(false),
			},
			ReadinessProbe: &gcore.InferenceDeploymentReadinessProbeArgs{
				Enabled: pulumi.Bool(false),
			},
			StartupProbe: &gcore.InferenceDeploymentStartupProbeArgs{
				Enabled: pulumi.Bool(false),
			},
		})
		if err != nil {
			return err
		}
		return nil
	})
}

using System.Collections.Generic;
using System.Linq;
using Pulumi;
using Gcore = Pulumi.Gcore;

return await Deployment.RunAsync(() => 
{
    var aws = new Gcore.InferenceSecret("aws", new()
    {
        ProjectId = data.Gcore_project.Project.Id,
        DataAwsAccessKeyId = "my-aws-access-key-id",
        DataAwsSecretAccessKey = "my-aws-access-key",
    });

    var inf = new Gcore.InferenceDeployment("inf", new()
    {
        ProjectId = data.Gcore_project.Project.Id,
        Image = "nginx:latest",
        ListeningPort = 80,
        FlavorName = "inference-4vcpu-16gib",
        Timeout = 60,
        Containers = new[]
        {
            new Gcore.Inputs.InferenceDeploymentContainerArgs
            {
                RegionId = data.Gcore_region.Region.Id,
                CooldownPeriod = 60,
                PollingInterval = 60,
                ScaleMin = 0,
                ScaleMax = 2,
                TriggersCpuThreshold = 80,
                TriggersSqsSecretName = aws.Name,
                TriggersSqsAwsRegion = "us-west-2",
                TriggersSqsQueueUrl = "https://sqs.us-west-2.amazonaws.com/1234567890/my-queue",
                TriggersSqsQueueLength = 5,
                TriggersSqsActivationQueueLength = 2,
            },
        },
        LivenessProbe = new Gcore.Inputs.InferenceDeploymentLivenessProbeArgs
        {
            Enabled = false,
        },
        ReadinessProbe = new Gcore.Inputs.InferenceDeploymentReadinessProbeArgs
        {
            Enabled = false,
        },
        StartupProbe = new Gcore.Inputs.InferenceDeploymentStartupProbeArgs
        {
            Enabled = false,
        },
    });

});

package generated_program;

import com.pulumi.Context;
import com.pulumi.Pulumi;
import com.pulumi.core.Output;
import com.pulumi.gcore.InferenceSecret;
import com.pulumi.gcore.InferenceSecretArgs;
import com.pulumi.gcore.InferenceDeployment;
import com.pulumi.gcore.InferenceDeploymentArgs;
import com.pulumi.gcore.inputs.InferenceDeploymentContainerArgs;
import com.pulumi.gcore.inputs.InferenceDeploymentLivenessProbeArgs;
import com.pulumi.gcore.inputs.InferenceDeploymentReadinessProbeArgs;
import com.pulumi.gcore.inputs.InferenceDeploymentStartupProbeArgs;
import java.util.List;
import java.util.ArrayList;
import java.util.Map;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;

public class App {
    public static void main(String[] args) {
        Pulumi.run(App::stack);
    }

    public static void stack(Context ctx) {
        var aws = new InferenceSecret("aws", InferenceSecretArgs.builder()
            .projectId(data.gcore_project().project().id())
            .dataAwsAccessKeyId("my-aws-access-key-id")
            .dataAwsSecretAccessKey("my-aws-access-key")
            .build());

        var inf = new InferenceDeployment("inf", InferenceDeploymentArgs.builder()
            .projectId(data.gcore_project().project().id())
            .image("nginx:latest")
            .listeningPort(80)
            .flavorName("inference-4vcpu-16gib")
            .timeout(60)
            .containers(InferenceDeploymentContainerArgs.builder()
                .regionId(data.gcore_region().region().id())
                .cooldownPeriod(60)
                .pollingInterval(60)
                .scaleMin(0)
                .scaleMax(2)
                .triggersCpuThreshold(80)
                .triggersSqsSecretName(aws.name())
                .triggersSqsAwsRegion("us-west-2")
                .triggersSqsQueueUrl("https://sqs.us-west-2.amazonaws.com/1234567890/my-queue")
                .triggersSqsQueueLength(5)
                .triggersSqsActivationQueueLength(2)
                .build())
            .livenessProbe(InferenceDeploymentLivenessProbeArgs.builder()
                .enabled(false)
                .build())
            .readinessProbe(InferenceDeploymentReadinessProbeArgs.builder()
                .enabled(false)
                .build())
            .startupProbe(InferenceDeploymentStartupProbeArgs.builder()
                .enabled(false)
                .build())
            .build());

    }
}

resources:
  aws:
    type: gcore:InferenceSecret
    properties:
      projectId: ${data.gcore_project.project.id}
      dataAwsAccessKeyId: my-aws-access-key-id
      dataAwsSecretAccessKey: my-aws-access-key
  'inf':
    type: gcore:InferenceDeployment
    properties:
      projectId: ${data.gcore_project.project.id}
      image: nginx:latest
      listeningPort: 80
      flavorName: inference-4vcpu-16gib
      timeout: 60
      containers:
        - regionId: ${data.gcore_region.region.id}
          cooldownPeriod: 60
          pollingInterval: 60
          scaleMin: 0
          scaleMax: 2
          triggersCpuThreshold: 80
          triggersSqsSecretName: ${aws.name}
          triggersSqsAwsRegion: us-west-2
          triggersSqsQueueUrl: https://sqs.us-west-2.amazonaws.com/1234567890/my-queue
          triggersSqsQueueLength: 5
          triggersSqsActivationQueueLength: 2
      livenessProbe:
        enabled: false
      readinessProbe:
        enabled: false
      startupProbe:
        enabled: false

Create InferenceDeployment Resource

Resources are created with functions called constructors. To learn more about declaring and configuring resources, see Resources.

Constructor syntax

TypeScript
Python
Go
C#
Java
YAML

new InferenceDeployment(name: string, args: InferenceDeploymentArgs, opts?: CustomResourceOptions);

@overload
def InferenceDeployment(resource_name: str,
                        args: InferenceDeploymentArgs,
                        opts: Optional[ResourceOptions] = None)

@overload
def InferenceDeployment(resource_name: str,
                        opts: Optional[ResourceOptions] = None,
                        flavor_name: Optional[str] = None,
                        listening_port: Optional[float] = None,
                        containers: Optional[Sequence[InferenceDeploymentContainerArgs]] = None,
                        image: Optional[str] = None,
                        inference_deployment_id: Optional[str] = None,
                        logging: Optional[InferenceDeploymentLoggingArgs] = None,
                        description: Optional[str] = None,
                        credentials_name: Optional[str] = None,
                        auth_enabled: Optional[bool] = None,
                        command: Optional[str] = None,
                        liveness_probe: Optional[InferenceDeploymentLivenessProbeArgs] = None,
                        envs: Optional[Mapping[str, str]] = None,
                        name: Optional[str] = None,
                        project_id: Optional[float] = None,
                        project_name: Optional[str] = None,
                        readiness_probe: Optional[InferenceDeploymentReadinessProbeArgs] = None,
                        startup_probe: Optional[InferenceDeploymentStartupProbeArgs] = None,
                        timeout: Optional[float] = None)

func NewInferenceDeployment(ctx *Context, name string, args InferenceDeploymentArgs, opts ...ResourceOption) (*InferenceDeployment, error)

public InferenceDeployment(string name, InferenceDeploymentArgs args, CustomResourceOptions? opts = null)

public InferenceDeployment(String name, InferenceDeploymentArgs args)
public InferenceDeployment(String name, InferenceDeploymentArgs args, CustomResourceOptions options)

type: gcore:InferenceDeployment
properties: # The arguments to resource properties.
options: # Bag of options to control resource's behavior.

Parameters

name string: The unique name of the resource.
args InferenceDeploymentArgs: The arguments to resource properties.
opts CustomResourceOptions: Bag of options to control resource's behavior.

resource_name str: The unique name of the resource.
args InferenceDeploymentArgs: The arguments to resource properties.
opts ResourceOptions: Bag of options to control resource's behavior.

ctx Context: Context object for the current deployment.
name string: The unique name of the resource.
args InferenceDeploymentArgs: The arguments to resource properties.
opts ResourceOption: Bag of options to control resource's behavior.

name string: The unique name of the resource.
args InferenceDeploymentArgs: The arguments to resource properties.
opts CustomResourceOptions: Bag of options to control resource's behavior.

name String: The unique name of the resource.
args InferenceDeploymentArgs: The arguments to resource properties.
options CustomResourceOptions: Bag of options to control resource's behavior.

Constructor example

The following reference example uses placeholder values for all input properties.

TypeScript
Python
Go
C#
Java
YAML

var inferenceDeploymentResource = new Gcore.InferenceDeployment("inferenceDeploymentResource", new()
{
    FlavorName = "string",
    ListeningPort = 0,
    Containers = new[]
    {
        new Gcore.Inputs.InferenceDeploymentContainerArgs
        {
            ScaleMax = 0,
            CooldownPeriod = 0,
            ScaleMin = 0,
            RegionId = 0,
            TriggersHttpRate = 0,
            TriggersSqsActivationQueueLength = 0,
            TotalContainers = 0,
            TriggersCpuThreshold = 0,
            TriggersGpuMemoryThreshold = 0,
            TriggersGpuUtilizationThreshold = 0,
            PollingInterval = 0,
            TriggersHttpWindow = 0,
            TriggersMemoryThreshold = 0,
            ReadyContainers = 0,
            TriggersSqsAwsEndpoint = "string",
            TriggersSqsAwsRegion = "string",
            TriggersSqsQueueLength = 0,
            TriggersSqsQueueUrl = "string",
            TriggersSqsScaleOnDelayed = false,
            TriggersSqsScaleOnFlight = false,
            TriggersSqsSecretName = "string",
        },
    },
    Image = "string",
    InferenceDeploymentId = "string",
    Logging = new Gcore.Inputs.InferenceDeploymentLoggingArgs
    {
        DestinationRegionId = 0,
        Enabled = false,
        RetentionPolicyPeriod = 0,
        TopicName = "string",
    },
    Description = "string",
    CredentialsName = "string",
    AuthEnabled = false,
    Command = "string",
    LivenessProbe = new Gcore.Inputs.InferenceDeploymentLivenessProbeArgs
    {
        Enabled = false,
        ExecCommand = "string",
        FailureThreshold = 0,
        HttpGetHeaders = 
        {
            { "string", "string" },
        },
        HttpGetHost = "string",
        HttpGetPath = "string",
        HttpGetPort = 0,
        HttpGetSchema = "string",
        InitialDelaySeconds = 0,
        PeriodSeconds = 0,
        SuccessThreshold = 0,
        TcpSocketPort = 0,
        TimeoutSeconds = 0,
    },
    Envs = 
    {
        { "string", "string" },
    },
    Name = "string",
    ProjectId = 0,
    ProjectName = "string",
    ReadinessProbe = new Gcore.Inputs.InferenceDeploymentReadinessProbeArgs
    {
        Enabled = false,
        ExecCommand = "string",
        FailureThreshold = 0,
        HttpGetHeaders = 
        {
            { "string", "string" },
        },
        HttpGetHost = "string",
        HttpGetPath = "string",
        HttpGetPort = 0,
        HttpGetSchema = "string",
        InitialDelaySeconds = 0,
        PeriodSeconds = 0,
        SuccessThreshold = 0,
        TcpSocketPort = 0,
        TimeoutSeconds = 0,
    },
    StartupProbe = new Gcore.Inputs.InferenceDeploymentStartupProbeArgs
    {
        Enabled = false,
        ExecCommand = "string",
        FailureThreshold = 0,
        HttpGetHeaders = 
        {
            { "string", "string" },
        },
        HttpGetHost = "string",
        HttpGetPath = "string",
        HttpGetPort = 0,
        HttpGetSchema = "string",
        InitialDelaySeconds = 0,
        PeriodSeconds = 0,
        SuccessThreshold = 0,
        TcpSocketPort = 0,
        TimeoutSeconds = 0,
    },
    Timeout = 0,
});

example, err := gcore.NewInferenceDeployment(ctx, "inferenceDeploymentResource", &gcore.InferenceDeploymentArgs{
FlavorName: pulumi.String("string"),
ListeningPort: pulumi.Float64(0),
Containers: .InferenceDeploymentContainerArray{
&.InferenceDeploymentContainerArgs{
ScaleMax: pulumi.Float64(0),
CooldownPeriod: pulumi.Float64(0),
ScaleMin: pulumi.Float64(0),
RegionId: pulumi.Float64(0),
TriggersHttpRate: pulumi.Float64(0),
TriggersSqsActivationQueueLength: pulumi.Float64(0),
TotalContainers: pulumi.Float64(0),
TriggersCpuThreshold: pulumi.Float64(0),
TriggersGpuMemoryThreshold: pulumi.Float64(0),
TriggersGpuUtilizationThreshold: pulumi.Float64(0),
PollingInterval: pulumi.Float64(0),
TriggersHttpWindow: pulumi.Float64(0),
TriggersMemoryThreshold: pulumi.Float64(0),
ReadyContainers: pulumi.Float64(0),
TriggersSqsAwsEndpoint: pulumi.String("string"),
TriggersSqsAwsRegion: pulumi.String("string"),
TriggersSqsQueueLength: pulumi.Float64(0),
TriggersSqsQueueUrl: pulumi.String("string"),
TriggersSqsScaleOnDelayed: pulumi.Bool(false),
TriggersSqsScaleOnFlight: pulumi.Bool(false),
TriggersSqsSecretName: pulumi.String("string"),
},
},
Image: pulumi.String("string"),
InferenceDeploymentId: pulumi.String("string"),
Logging: &.InferenceDeploymentLoggingArgs{
DestinationRegionId: pulumi.Float64(0),
Enabled: pulumi.Bool(false),
RetentionPolicyPeriod: pulumi.Float64(0),
TopicName: pulumi.String("string"),
},
Description: pulumi.String("string"),
CredentialsName: pulumi.String("string"),
AuthEnabled: pulumi.Bool(false),
Command: pulumi.String("string"),
LivenessProbe: &.InferenceDeploymentLivenessProbeArgs{
Enabled: pulumi.Bool(false),
ExecCommand: pulumi.String("string"),
FailureThreshold: pulumi.Float64(0),
HttpGetHeaders: pulumi.StringMap{
"string": pulumi.String("string"),
},
HttpGetHost: pulumi.String("string"),
HttpGetPath: pulumi.String("string"),
HttpGetPort: pulumi.Float64(0),
HttpGetSchema: pulumi.String("string"),
InitialDelaySeconds: pulumi.Float64(0),
PeriodSeconds: pulumi.Float64(0),
SuccessThreshold: pulumi.Float64(0),
TcpSocketPort: pulumi.Float64(0),
TimeoutSeconds: pulumi.Float64(0),
},
Envs: pulumi.StringMap{
"string": pulumi.String("string"),
},
Name: pulumi.String("string"),
ProjectId: pulumi.Float64(0),
ProjectName: pulumi.String("string"),
ReadinessProbe: &.InferenceDeploymentReadinessProbeArgs{
Enabled: pulumi.Bool(false),
ExecCommand: pulumi.String("string"),
FailureThreshold: pulumi.Float64(0),
HttpGetHeaders: pulumi.StringMap{
"string": pulumi.String("string"),
},
HttpGetHost: pulumi.String("string"),
HttpGetPath: pulumi.String("string"),
HttpGetPort: pulumi.Float64(0),
HttpGetSchema: pulumi.String("string"),
InitialDelaySeconds: pulumi.Float64(0),
PeriodSeconds: pulumi.Float64(0),
SuccessThreshold: pulumi.Float64(0),
TcpSocketPort: pulumi.Float64(0),
TimeoutSeconds: pulumi.Float64(0),
},
StartupProbe: &.InferenceDeploymentStartupProbeArgs{
Enabled: pulumi.Bool(false),
ExecCommand: pulumi.String("string"),
FailureThreshold: pulumi.Float64(0),
HttpGetHeaders: pulumi.StringMap{
"string": pulumi.String("string"),
},
HttpGetHost: pulumi.String("string"),
HttpGetPath: pulumi.String("string"),
HttpGetPort: pulumi.Float64(0),
HttpGetSchema: pulumi.String("string"),
InitialDelaySeconds: pulumi.Float64(0),
PeriodSeconds: pulumi.Float64(0),
SuccessThreshold: pulumi.Float64(0),
TcpSocketPort: pulumi.Float64(0),
TimeoutSeconds: pulumi.Float64(0),
},
Timeout: pulumi.Float64(0),
})

var inferenceDeploymentResource = new InferenceDeployment("inferenceDeploymentResource", InferenceDeploymentArgs.builder()
    .flavorName("string")
    .listeningPort(0)
    .containers(InferenceDeploymentContainerArgs.builder()
        .scaleMax(0)
        .cooldownPeriod(0)
        .scaleMin(0)
        .regionId(0)
        .triggersHttpRate(0)
        .triggersSqsActivationQueueLength(0)
        .totalContainers(0)
        .triggersCpuThreshold(0)
        .triggersGpuMemoryThreshold(0)
        .triggersGpuUtilizationThreshold(0)
        .pollingInterval(0)
        .triggersHttpWindow(0)
        .triggersMemoryThreshold(0)
        .readyContainers(0)
        .triggersSqsAwsEndpoint("string")
        .triggersSqsAwsRegion("string")
        .triggersSqsQueueLength(0)
        .triggersSqsQueueUrl("string")
        .triggersSqsScaleOnDelayed(false)
        .triggersSqsScaleOnFlight(false)
        .triggersSqsSecretName("string")
        .build())
    .image("string")
    .inferenceDeploymentId("string")
    .logging(InferenceDeploymentLoggingArgs.builder()
        .destinationRegionId(0)
        .enabled(false)
        .retentionPolicyPeriod(0)
        .topicName("string")
        .build())
    .description("string")
    .credentialsName("string")
    .authEnabled(false)
    .command("string")
    .livenessProbe(InferenceDeploymentLivenessProbeArgs.builder()
        .enabled(false)
        .execCommand("string")
        .failureThreshold(0)
        .httpGetHeaders(Map.of("string", "string"))
        .httpGetHost("string")
        .httpGetPath("string")
        .httpGetPort(0)
        .httpGetSchema("string")
        .initialDelaySeconds(0)
        .periodSeconds(0)
        .successThreshold(0)
        .tcpSocketPort(0)
        .timeoutSeconds(0)
        .build())
    .envs(Map.of("string", "string"))
    .name("string")
    .projectId(0)
    .projectName("string")
    .readinessProbe(InferenceDeploymentReadinessProbeArgs.builder()
        .enabled(false)
        .execCommand("string")
        .failureThreshold(0)
        .httpGetHeaders(Map.of("string", "string"))
        .httpGetHost("string")
        .httpGetPath("string")
        .httpGetPort(0)
        .httpGetSchema("string")
        .initialDelaySeconds(0)
        .periodSeconds(0)
        .successThreshold(0)
        .tcpSocketPort(0)
        .timeoutSeconds(0)
        .build())
    .startupProbe(InferenceDeploymentStartupProbeArgs.builder()
        .enabled(false)
        .execCommand("string")
        .failureThreshold(0)
        .httpGetHeaders(Map.of("string", "string"))
        .httpGetHost("string")
        .httpGetPath("string")
        .httpGetPort(0)
        .httpGetSchema("string")
        .initialDelaySeconds(0)
        .periodSeconds(0)
        .successThreshold(0)
        .tcpSocketPort(0)
        .timeoutSeconds(0)
        .build())
    .timeout(0)
    .build());

inference_deployment_resource = gcore.InferenceDeployment("inferenceDeploymentResource",
    flavor_name="string",
    listening_port=0,
    containers=[{
        "scale_max": 0,
        "cooldown_period": 0,
        "scale_min": 0,
        "region_id": 0,
        "triggers_http_rate": 0,
        "triggers_sqs_activation_queue_length": 0,
        "total_containers": 0,
        "triggers_cpu_threshold": 0,
        "triggers_gpu_memory_threshold": 0,
        "triggers_gpu_utilization_threshold": 0,
        "polling_interval": 0,
        "triggers_http_window": 0,
        "triggers_memory_threshold": 0,
        "ready_containers": 0,
        "triggers_sqs_aws_endpoint": "string",
        "triggers_sqs_aws_region": "string",
        "triggers_sqs_queue_length": 0,
        "triggers_sqs_queue_url": "string",
        "triggers_sqs_scale_on_delayed": False,
        "triggers_sqs_scale_on_flight": False,
        "triggers_sqs_secret_name": "string",
    }],
    image="string",
    inference_deployment_id="string",
    logging={
        "destination_region_id": 0,
        "enabled": False,
        "retention_policy_period": 0,
        "topic_name": "string",
    },
    description="string",
    credentials_name="string",
    auth_enabled=False,
    command="string",
    liveness_probe={
        "enabled": False,
        "exec_command": "string",
        "failure_threshold": 0,
        "http_get_headers": {
            "string": "string",
        },
        "http_get_host": "string",
        "http_get_path": "string",
        "http_get_port": 0,
        "http_get_schema": "string",
        "initial_delay_seconds": 0,
        "period_seconds": 0,
        "success_threshold": 0,
        "tcp_socket_port": 0,
        "timeout_seconds": 0,
    },
    envs={
        "string": "string",
    },
    name="string",
    project_id=0,
    project_name="string",
    readiness_probe={
        "enabled": False,
        "exec_command": "string",
        "failure_threshold": 0,
        "http_get_headers": {
            "string": "string",
        },
        "http_get_host": "string",
        "http_get_path": "string",
        "http_get_port": 0,
        "http_get_schema": "string",
        "initial_delay_seconds": 0,
        "period_seconds": 0,
        "success_threshold": 0,
        "tcp_socket_port": 0,
        "timeout_seconds": 0,
    },
    startup_probe={
        "enabled": False,
        "exec_command": "string",
        "failure_threshold": 0,
        "http_get_headers": {
            "string": "string",
        },
        "http_get_host": "string",
        "http_get_path": "string",
        "http_get_port": 0,
        "http_get_schema": "string",
        "initial_delay_seconds": 0,
        "period_seconds": 0,
        "success_threshold": 0,
        "tcp_socket_port": 0,
        "timeout_seconds": 0,
    },
    timeout=0)

const inferenceDeploymentResource = new gcore.InferenceDeployment("inferenceDeploymentResource", {
    flavorName: "string",
    listeningPort: 0,
    containers: [{
        scaleMax: 0,
        cooldownPeriod: 0,
        scaleMin: 0,
        regionId: 0,
        triggersHttpRate: 0,
        triggersSqsActivationQueueLength: 0,
        totalContainers: 0,
        triggersCpuThreshold: 0,
        triggersGpuMemoryThreshold: 0,
        triggersGpuUtilizationThreshold: 0,
        pollingInterval: 0,
        triggersHttpWindow: 0,
        triggersMemoryThreshold: 0,
        readyContainers: 0,
        triggersSqsAwsEndpoint: "string",
        triggersSqsAwsRegion: "string",
        triggersSqsQueueLength: 0,
        triggersSqsQueueUrl: "string",
        triggersSqsScaleOnDelayed: false,
        triggersSqsScaleOnFlight: false,
        triggersSqsSecretName: "string",
    }],
    image: "string",
    inferenceDeploymentId: "string",
    logging: {
        destinationRegionId: 0,
        enabled: false,
        retentionPolicyPeriod: 0,
        topicName: "string",
    },
    description: "string",
    credentialsName: "string",
    authEnabled: false,
    command: "string",
    livenessProbe: {
        enabled: false,
        execCommand: "string",
        failureThreshold: 0,
        httpGetHeaders: {
            string: "string",
        },
        httpGetHost: "string",
        httpGetPath: "string",
        httpGetPort: 0,
        httpGetSchema: "string",
        initialDelaySeconds: 0,
        periodSeconds: 0,
        successThreshold: 0,
        tcpSocketPort: 0,
        timeoutSeconds: 0,
    },
    envs: {
        string: "string",
    },
    name: "string",
    projectId: 0,
    projectName: "string",
    readinessProbe: {
        enabled: false,
        execCommand: "string",
        failureThreshold: 0,
        httpGetHeaders: {
            string: "string",
        },
        httpGetHost: "string",
        httpGetPath: "string",
        httpGetPort: 0,
        httpGetSchema: "string",
        initialDelaySeconds: 0,
        periodSeconds: 0,
        successThreshold: 0,
        tcpSocketPort: 0,
        timeoutSeconds: 0,
    },
    startupProbe: {
        enabled: false,
        execCommand: "string",
        failureThreshold: 0,
        httpGetHeaders: {
            string: "string",
        },
        httpGetHost: "string",
        httpGetPath: "string",
        httpGetPort: 0,
        httpGetSchema: "string",
        initialDelaySeconds: 0,
        periodSeconds: 0,
        successThreshold: 0,
        tcpSocketPort: 0,
        timeoutSeconds: 0,
    },
    timeout: 0,
});

type: gcore:InferenceDeployment
properties:
    authEnabled: false
    command: string
    containers:
        - cooldownPeriod: 0
          pollingInterval: 0
          readyContainers: 0
          regionId: 0
          scaleMax: 0
          scaleMin: 0
          totalContainers: 0
          triggersCpuThreshold: 0
          triggersGpuMemoryThreshold: 0
          triggersGpuUtilizationThreshold: 0
          triggersHttpRate: 0
          triggersHttpWindow: 0
          triggersMemoryThreshold: 0
          triggersSqsActivationQueueLength: 0
          triggersSqsAwsEndpoint: string
          triggersSqsAwsRegion: string
          triggersSqsQueueLength: 0
          triggersSqsQueueUrl: string
          triggersSqsScaleOnDelayed: false
          triggersSqsScaleOnFlight: false
          triggersSqsSecretName: string
    credentialsName: string
    description: string
    envs:
        string: string
    flavorName: string
    image: string
    inferenceDeploymentId: string
    listeningPort: 0
    livenessProbe:
        enabled: false
        execCommand: string
        failureThreshold: 0
        httpGetHeaders:
            string: string
        httpGetHost: string
        httpGetPath: string
        httpGetPort: 0
        httpGetSchema: string
        initialDelaySeconds: 0
        periodSeconds: 0
        successThreshold: 0
        tcpSocketPort: 0
        timeoutSeconds: 0
    logging:
        destinationRegionId: 0
        enabled: false
        retentionPolicyPeriod: 0
        topicName: string
    name: string
    projectId: 0
    projectName: string
    readinessProbe:
        enabled: false
        execCommand: string
        failureThreshold: 0
        httpGetHeaders:
            string: string
        httpGetHost: string
        httpGetPath: string
        httpGetPort: 0
        httpGetSchema: string
        initialDelaySeconds: 0
        periodSeconds: 0
        successThreshold: 0
        tcpSocketPort: 0
        timeoutSeconds: 0
    startupProbe:
        enabled: false
        execCommand: string
        failureThreshold: 0
        httpGetHeaders:
            string: string
        httpGetHost: string
        httpGetPath: string
        httpGetPort: 0
        httpGetSchema: string
        initialDelaySeconds: 0
        periodSeconds: 0
        successThreshold: 0
        tcpSocketPort: 0
        timeoutSeconds: 0
    timeout: 0

InferenceDeployment Resource Properties

To learn more about resource properties and how to use them, see Inputs and Outputs in the Architecture and Concepts docs.

Inputs

In Python, inputs that are objects can be passed either as argument classes or as dictionary literals.

The InferenceDeployment resource accepts the following input properties:

Containers List<InferenceDeploymentContainer>: A required list of container definitions. Each entry represents a container configuration, and at least one container must be specified. See the nested schema below for further details.
FlavorName string: Specifies the resource flavor for the container, determining its allocated CPU, memory, and potentially GPU resources.
Image string: The container image to be used for deployment. This should be a valid image reference, such as a public or private Docker image (registry.example.com/my-image:latest). Note: If the image is hosted in a private registry, you must specify credentials_name to provide authentication details.
ListeningPort double: The port on which the container will accept incoming traffic. This should match the port your application is configured to listen on within the container.
AuthEnabled bool: Set to true to enable API key authentication for the inference instance.
Command string: Command to be executed when running a container from an image.
CredentialsName string: Required if using a private image registry. Specifies the name of the credentials to authenticate with the registry where the container image is stored.
Description string
Envs Dictionary<string, string>: Environment variables for the inference instance.
InferenceDeploymentId string: The ID of this resource.
LivenessProbe InferenceDeploymentLivenessProbe
Logging InferenceDeploymentLogging
Name string: The name of the deployment. This should be unique within the scope of the project.
ProjectId double
ProjectName string
ReadinessProbe InferenceDeploymentReadinessProbe
StartupProbe InferenceDeploymentStartupProbe
Timeout double

Containers []InferenceDeploymentContainerArgs: A required list of container definitions. Each entry represents a container configuration, and at least one container must be specified. See the nested schema below for further details.
FlavorName string: Specifies the resource flavor for the container, determining its allocated CPU, memory, and potentially GPU resources.
Image string: The container image to be used for deployment. This should be a valid image reference, such as a public or private Docker image (registry.example.com/my-image:latest). Note: If the image is hosted in a private registry, you must specify credentials_name to provide authentication details.
ListeningPort float64: The port on which the container will accept incoming traffic. This should match the port your application is configured to listen on within the container.
AuthEnabled bool: Set to true to enable API key authentication for the inference instance.
Command string: Command to be executed when running a container from an image.
CredentialsName string: Required if using a private image registry. Specifies the name of the credentials to authenticate with the registry where the container image is stored.
Description string
Envs map[string]string: Environment variables for the inference instance.
InferenceDeploymentId string: The ID of this resource.
LivenessProbe InferenceDeploymentLivenessProbeArgs
Logging InferenceDeploymentLoggingArgs
Name string: The name of the deployment. This should be unique within the scope of the project.
ProjectId float64
ProjectName string
ReadinessProbe InferenceDeploymentReadinessProbeArgs
StartupProbe InferenceDeploymentStartupProbeArgs
Timeout float64

containers List<InferenceDeploymentContainer>: A required list of container definitions. Each entry represents a container configuration, and at least one container must be specified. See the nested schema below for further details.
flavorName String: Specifies the resource flavor for the container, determining its allocated CPU, memory, and potentially GPU resources.
image String: The container image to be used for deployment. This should be a valid image reference, such as a public or private Docker image (registry.example.com/my-image:latest). Note: If the image is hosted in a private registry, you must specify credentials_name to provide authentication details.
listeningPort Double: The port on which the container will accept incoming traffic. This should match the port your application is configured to listen on within the container.
authEnabled Boolean: Set to true to enable API key authentication for the inference instance.
command String: Command to be executed when running a container from an image.
credentialsName String: Required if using a private image registry. Specifies the name of the credentials to authenticate with the registry where the container image is stored.
description String
envs Map<String,String>: Environment variables for the inference instance.
inferenceDeploymentId String: The ID of this resource.
livenessProbe InferenceDeploymentLivenessProbe
logging InferenceDeploymentLogging
name String: The name of the deployment. This should be unique within the scope of the project.
projectId Double
projectName String
readinessProbe InferenceDeploymentReadinessProbe
startupProbe InferenceDeploymentStartupProbe
timeout Double

containers InferenceDeploymentContainer[]: A required list of container definitions. Each entry represents a container configuration, and at least one container must be specified. See the nested schema below for further details.
flavorName string: Specifies the resource flavor for the container, determining its allocated CPU, memory, and potentially GPU resources.
image string: The container image to be used for deployment. This should be a valid image reference, such as a public or private Docker image (registry.example.com/my-image:latest). Note: If the image is hosted in a private registry, you must specify credentials_name to provide authentication details.
listeningPort number: The port on which the container will accept incoming traffic. This should match the port your application is configured to listen on within the container.
authEnabled boolean: Set to true to enable API key authentication for the inference instance.
command string: Command to be executed when running a container from an image.
credentialsName string: Required if using a private image registry. Specifies the name of the credentials to authenticate with the registry where the container image is stored.
description string
envs {[key: string]: string}: Environment variables for the inference instance.
inferenceDeploymentId string: The ID of this resource.
livenessProbe InferenceDeploymentLivenessProbe
logging InferenceDeploymentLogging
name string: The name of the deployment. This should be unique within the scope of the project.
projectId number
projectName string
readinessProbe InferenceDeploymentReadinessProbe
startupProbe InferenceDeploymentStartupProbe
timeout number

containers Sequence[InferenceDeploymentContainerArgs]: A required list of container definitions. Each entry represents a container configuration, and at least one container must be specified. See the nested schema below for further details.
flavor_name str: Specifies the resource flavor for the container, determining its allocated CPU, memory, and potentially GPU resources.
image str: The container image to be used for deployment. This should be a valid image reference, such as a public or private Docker image (registry.example.com/my-image:latest). Note: If the image is hosted in a private registry, you must specify credentials_name to provide authentication details.
listening_port float: The port on which the container will accept incoming traffic. This should match the port your application is configured to listen on within the container.
auth_enabled bool: Set to true to enable API key authentication for the inference instance.
command str: Command to be executed when running a container from an image.
credentials_name str: Required if using a private image registry. Specifies the name of the credentials to authenticate with the registry where the container image is stored.
description str
envs Mapping[str, str]: Environment variables for the inference instance.
inference_deployment_id str: The ID of this resource.
liveness_probe InferenceDeploymentLivenessProbeArgs
logging InferenceDeploymentLoggingArgs
name str: The name of the deployment. This should be unique within the scope of the project.
project_id float
project_name str
readiness_probe InferenceDeploymentReadinessProbeArgs
startup_probe InferenceDeploymentStartupProbeArgs
timeout float

containers List<Property Map>: A required list of container definitions. Each entry represents a container configuration, and at least one container must be specified. See the nested schema below for further details.
flavorName String: Specifies the resource flavor for the container, determining its allocated CPU, memory, and potentially GPU resources.
image String: The container image to be used for deployment. This should be a valid image reference, such as a public or private Docker image (registry.example.com/my-image:latest). Note: If the image is hosted in a private registry, you must specify credentials_name to provide authentication details.
listeningPort Number: The port on which the container will accept incoming traffic. This should match the port your application is configured to listen on within the container.
authEnabled Boolean: Set to true to enable API key authentication for the inference instance.
command String: Command to be executed when running a container from an image.
credentialsName String: Required if using a private image registry. Specifies the name of the credentials to authenticate with the registry where the container image is stored.
description String
envs Map<String>: Environment variables for the inference instance.
inferenceDeploymentId String: The ID of this resource.
livenessProbe Property Map
logging Property Map
name String: The name of the deployment. This should be unique within the scope of the project.
projectId Number
projectName String
readinessProbe Property Map
startupProbe Property Map
timeout Number

Outputs

All input properties are implicitly available as output properties. Additionally, the InferenceDeployment resource produces the following output properties:

Address string
CreatedAt string: Datetime when the inference deployment was created. The format is 2025-12-28T19:14:44.180394
Id string: The provider-assigned unique ID for this managed resource.
Status string

Address string
CreatedAt string: Datetime when the inference deployment was created. The format is 2025-12-28T19:14:44.180394
Id string: The provider-assigned unique ID for this managed resource.
Status string

address String
createdAt String: Datetime when the inference deployment was created. The format is 2025-12-28T19:14:44.180394
id String: The provider-assigned unique ID for this managed resource.
status String

address string
createdAt string: Datetime when the inference deployment was created. The format is 2025-12-28T19:14:44.180394
id string: The provider-assigned unique ID for this managed resource.
status string

address str
created_at str: Datetime when the inference deployment was created. The format is 2025-12-28T19:14:44.180394
id str: The provider-assigned unique ID for this managed resource.
status str

address String
createdAt String: Datetime when the inference deployment was created. The format is 2025-12-28T19:14:44.180394
id String: The provider-assigned unique ID for this managed resource.
status String

Look up Existing InferenceDeployment Resource

Get an existing InferenceDeployment resource’s state with the given name, ID, and optional extra properties used to qualify the lookup.

TypeScript
Python
Go
C#
Java
YAML

public static get(name: string, id: Input<ID>, state?: InferenceDeploymentState, opts?: CustomResourceOptions): InferenceDeployment

@staticmethod
def get(resource_name: str,
        id: str,
        opts: Optional[ResourceOptions] = None,
        address: Optional[str] = None,
        auth_enabled: Optional[bool] = None,
        command: Optional[str] = None,
        containers: Optional[Sequence[InferenceDeploymentContainerArgs]] = None,
        created_at: Optional[str] = None,
        credentials_name: Optional[str] = None,
        description: Optional[str] = None,
        envs: Optional[Mapping[str, str]] = None,
        flavor_name: Optional[str] = None,
        image: Optional[str] = None,
        inference_deployment_id: Optional[str] = None,
        listening_port: Optional[float] = None,
        liveness_probe: Optional[InferenceDeploymentLivenessProbeArgs] = None,
        logging: Optional[InferenceDeploymentLoggingArgs] = None,
        name: Optional[str] = None,
        project_id: Optional[float] = None,
        project_name: Optional[str] = None,
        readiness_probe: Optional[InferenceDeploymentReadinessProbeArgs] = None,
        startup_probe: Optional[InferenceDeploymentStartupProbeArgs] = None,
        status: Optional[str] = None,
        timeout: Optional[float] = None) -> InferenceDeployment

func GetInferenceDeployment(ctx *Context, name string, id IDInput, state *InferenceDeploymentState, opts ...ResourceOption) (*InferenceDeployment, error)

public static InferenceDeployment Get(string name, Input<string> id, InferenceDeploymentState? state, CustomResourceOptions? opts = null)

public static InferenceDeployment get(String name, Output<String> id, InferenceDeploymentState state, CustomResourceOptions options)

resources:  _:    type: gcore:InferenceDeployment    get:      id: ${id}

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

resource_name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

name: The unique name of the resulting resource.
id: The unique provider ID of the resource to lookup.
state: Any extra arguments used during the lookup.
opts: A bag of options that control this resource's behavior.

The following state arguments are supported:

Address string
AuthEnabled bool: Set to true to enable API key authentication for the inference instance.
Command string: Command to be executed when running a container from an image.
Containers List<InferenceDeploymentContainer>: A required list of container definitions. Each entry represents a container configuration, and at least one container must be specified. See the nested schema below for further details.
CreatedAt string: Datetime when the inference deployment was created. The format is 2025-12-28T19:14:44.180394
CredentialsName string: Required if using a private image registry. Specifies the name of the credentials to authenticate with the registry where the container image is stored.
Description string
Envs Dictionary<string, string>: Environment variables for the inference instance.
FlavorName string: Specifies the resource flavor for the container, determining its allocated CPU, memory, and potentially GPU resources.
Image string: The container image to be used for deployment. This should be a valid image reference, such as a public or private Docker image (registry.example.com/my-image:latest). Note: If the image is hosted in a private registry, you must specify credentials_name to provide authentication details.
InferenceDeploymentId string: The ID of this resource.
ListeningPort double: The port on which the container will accept incoming traffic. This should match the port your application is configured to listen on within the container.
LivenessProbe InferenceDeploymentLivenessProbe
Logging InferenceDeploymentLogging
Name string: The name of the deployment. This should be unique within the scope of the project.
ProjectId double
ProjectName string
ReadinessProbe InferenceDeploymentReadinessProbe
StartupProbe InferenceDeploymentStartupProbe
Status string
Timeout double

Address string
AuthEnabled bool: Set to true to enable API key authentication for the inference instance.
Command string: Command to be executed when running a container from an image.
Containers []InferenceDeploymentContainerArgs: A required list of container definitions. Each entry represents a container configuration, and at least one container must be specified. See the nested schema below for further details.
CreatedAt string: Datetime when the inference deployment was created. The format is 2025-12-28T19:14:44.180394
CredentialsName string: Required if using a private image registry. Specifies the name of the credentials to authenticate with the registry where the container image is stored.
Description string
Envs map[string]string: Environment variables for the inference instance.
FlavorName string: Specifies the resource flavor for the container, determining its allocated CPU, memory, and potentially GPU resources.
Image string: The container image to be used for deployment. This should be a valid image reference, such as a public or private Docker image (registry.example.com/my-image:latest). Note: If the image is hosted in a private registry, you must specify credentials_name to provide authentication details.
InferenceDeploymentId string: The ID of this resource.
ListeningPort float64: The port on which the container will accept incoming traffic. This should match the port your application is configured to listen on within the container.
LivenessProbe InferenceDeploymentLivenessProbeArgs
Logging InferenceDeploymentLoggingArgs
Name string: The name of the deployment. This should be unique within the scope of the project.
ProjectId float64
ProjectName string
ReadinessProbe InferenceDeploymentReadinessProbeArgs
StartupProbe InferenceDeploymentStartupProbeArgs
Status string
Timeout float64

address String
authEnabled Boolean: Set to true to enable API key authentication for the inference instance.
command String: Command to be executed when running a container from an image.
containers List<InferenceDeploymentContainer>: A required list of container definitions. Each entry represents a container configuration, and at least one container must be specified. See the nested schema below for further details.
createdAt String: Datetime when the inference deployment was created. The format is 2025-12-28T19:14:44.180394
credentialsName String: Required if using a private image registry. Specifies the name of the credentials to authenticate with the registry where the container image is stored.
description String
envs Map<String,String>: Environment variables for the inference instance.
flavorName String: Specifies the resource flavor for the container, determining its allocated CPU, memory, and potentially GPU resources.
image String: The container image to be used for deployment. This should be a valid image reference, such as a public or private Docker image (registry.example.com/my-image:latest). Note: If the image is hosted in a private registry, you must specify credentials_name to provide authentication details.
inferenceDeploymentId String: The ID of this resource.
listeningPort Double: The port on which the container will accept incoming traffic. This should match the port your application is configured to listen on within the container.
livenessProbe InferenceDeploymentLivenessProbe
logging InferenceDeploymentLogging
name String: The name of the deployment. This should be unique within the scope of the project.
projectId Double
projectName String
readinessProbe InferenceDeploymentReadinessProbe
startupProbe InferenceDeploymentStartupProbe
status String
timeout Double

address string
authEnabled boolean: Set to true to enable API key authentication for the inference instance.
command string: Command to be executed when running a container from an image.
containers InferenceDeploymentContainer[]: A required list of container definitions. Each entry represents a container configuration, and at least one container must be specified. See the nested schema below for further details.
createdAt string: Datetime when the inference deployment was created. The format is 2025-12-28T19:14:44.180394
credentialsName string: Required if using a private image registry. Specifies the name of the credentials to authenticate with the registry where the container image is stored.
description string
envs {[key: string]: string}: Environment variables for the inference instance.
flavorName string: Specifies the resource flavor for the container, determining its allocated CPU, memory, and potentially GPU resources.
image string: The container image to be used for deployment. This should be a valid image reference, such as a public or private Docker image (registry.example.com/my-image:latest). Note: If the image is hosted in a private registry, you must specify credentials_name to provide authentication details.
inferenceDeploymentId string: The ID of this resource.
listeningPort number: The port on which the container will accept incoming traffic. This should match the port your application is configured to listen on within the container.
livenessProbe InferenceDeploymentLivenessProbe
logging InferenceDeploymentLogging
name string: The name of the deployment. This should be unique within the scope of the project.
projectId number
projectName string
readinessProbe InferenceDeploymentReadinessProbe
startupProbe InferenceDeploymentStartupProbe
status string
timeout number

address str
auth_enabled bool: Set to true to enable API key authentication for the inference instance.
command str: Command to be executed when running a container from an image.
containers Sequence[InferenceDeploymentContainerArgs]: A required list of container definitions. Each entry represents a container configuration, and at least one container must be specified. See the nested schema below for further details.
created_at str: Datetime when the inference deployment was created. The format is 2025-12-28T19:14:44.180394
credentials_name str: Required if using a private image registry. Specifies the name of the credentials to authenticate with the registry where the container image is stored.
description str
envs Mapping[str, str]: Environment variables for the inference instance.
flavor_name str: Specifies the resource flavor for the container, determining its allocated CPU, memory, and potentially GPU resources.
image str: The container image to be used for deployment. This should be a valid image reference, such as a public or private Docker image (registry.example.com/my-image:latest). Note: If the image is hosted in a private registry, you must specify credentials_name to provide authentication details.
inference_deployment_id str: The ID of this resource.
listening_port float: The port on which the container will accept incoming traffic. This should match the port your application is configured to listen on within the container.
liveness_probe InferenceDeploymentLivenessProbeArgs
logging InferenceDeploymentLoggingArgs
name str: The name of the deployment. This should be unique within the scope of the project.
project_id float
project_name str
readiness_probe InferenceDeploymentReadinessProbeArgs
startup_probe InferenceDeploymentStartupProbeArgs
status str
timeout float

address String
authEnabled Boolean: Set to true to enable API key authentication for the inference instance.
command String: Command to be executed when running a container from an image.
containers List<Property Map>: A required list of container definitions. Each entry represents a container configuration, and at least one container must be specified. See the nested schema below for further details.
createdAt String: Datetime when the inference deployment was created. The format is 2025-12-28T19:14:44.180394
credentialsName String: Required if using a private image registry. Specifies the name of the credentials to authenticate with the registry where the container image is stored.
description String
envs Map<String>: Environment variables for the inference instance.
flavorName String: Specifies the resource flavor for the container, determining its allocated CPU, memory, and potentially GPU resources.
image String: The container image to be used for deployment. This should be a valid image reference, such as a public or private Docker image (registry.example.com/my-image:latest). Note: If the image is hosted in a private registry, you must specify credentials_name to provide authentication details.
inferenceDeploymentId String: The ID of this resource.
listeningPort Number: The port on which the container will accept incoming traffic. This should match the port your application is configured to listen on within the container.
livenessProbe Property Map
logging Property Map
name String: The name of the deployment. This should be unique within the scope of the project.
projectId Number
projectName String
readinessProbe Property Map
startupProbe Property Map
status String
timeout Number

Supporting Types

InferenceDeploymentContainer
, InferenceDeploymentContainerArgs

CooldownPeriod double: Cooldown period between scaling actions in seconds
RegionId double: Region id for the container
ScaleMax double: Maximum scale for the container
ScaleMin double: Minimum scale for the container. It can be set to 0, in which case the container will be downscaled to 0 when there is no load.
PollingInterval double: Polling interval for scaling triggers in seconds
ReadyContainers double: Status of the containers deployment. Number of ready instances
TotalContainers double: Status of the containers deployment. Total number of instances
TriggersCpuThreshold double: CPU trigger threshold configuration
TriggersGpuMemoryThreshold double: GPU memory trigger threshold configuration. Calculated by DCGMFIDEVMEMCOPY_UTIL metric
TriggersGpuUtilizationThreshold double: GPU utilization trigger threshold configuration. Calculated by DCGMFIDEVGPUUTIL metric
TriggersHttpRate double: Request count per 'window' seconds for the http trigger. Required if you use http trigger
TriggersHttpWindow double: Time window for rate calculation in seconds. Required if you use http trigger
TriggersMemoryThreshold double: Memory trigger threshold configuration
TriggersSqsActivationQueueLength double: Number of messages for activation
TriggersSqsAwsEndpoint string: Custom AWS endpoint, left empty to use default aws endpoint
TriggersSqsAwsRegion string: AWS region. Required if you use SQS trigger
TriggersSqsQueueLength double: Number of messages for one replica
TriggersSqsQueueUrl string: URL of the SQS queue. Required if you use SQS trigger
TriggersSqsScaleOnDelayed bool: Scale on delayed messages
TriggersSqsScaleOnFlight bool: Scale on in-flight messages
TriggersSqsSecretName string: Name of the secret with AWS credentials. Required if you use SQS trigger

CooldownPeriod float64: Cooldown period between scaling actions in seconds
RegionId float64: Region id for the container
ScaleMax float64: Maximum scale for the container
ScaleMin float64: Minimum scale for the container. It can be set to 0, in which case the container will be downscaled to 0 when there is no load.
PollingInterval float64: Polling interval for scaling triggers in seconds
ReadyContainers float64: Status of the containers deployment. Number of ready instances
TotalContainers float64: Status of the containers deployment. Total number of instances
TriggersCpuThreshold float64: CPU trigger threshold configuration
TriggersGpuMemoryThreshold float64: GPU memory trigger threshold configuration. Calculated by DCGMFIDEVMEMCOPY_UTIL metric
TriggersGpuUtilizationThreshold float64: GPU utilization trigger threshold configuration. Calculated by DCGMFIDEVGPUUTIL metric
TriggersHttpRate float64: Request count per 'window' seconds for the http trigger. Required if you use http trigger
TriggersHttpWindow float64: Time window for rate calculation in seconds. Required if you use http trigger
TriggersMemoryThreshold float64: Memory trigger threshold configuration
TriggersSqsActivationQueueLength float64: Number of messages for activation
TriggersSqsAwsEndpoint string: Custom AWS endpoint, left empty to use default aws endpoint
TriggersSqsAwsRegion string: AWS region. Required if you use SQS trigger
TriggersSqsQueueLength float64: Number of messages for one replica
TriggersSqsQueueUrl string: URL of the SQS queue. Required if you use SQS trigger
TriggersSqsScaleOnDelayed bool: Scale on delayed messages
TriggersSqsScaleOnFlight bool: Scale on in-flight messages
TriggersSqsSecretName string: Name of the secret with AWS credentials. Required if you use SQS trigger

cooldownPeriod Double: Cooldown period between scaling actions in seconds
regionId Double: Region id for the container
scaleMax Double: Maximum scale for the container
scaleMin Double: Minimum scale for the container. It can be set to 0, in which case the container will be downscaled to 0 when there is no load.
pollingInterval Double: Polling interval for scaling triggers in seconds
readyContainers Double: Status of the containers deployment. Number of ready instances
totalContainers Double: Status of the containers deployment. Total number of instances
triggersCpuThreshold Double: CPU trigger threshold configuration
triggersGpuMemoryThreshold Double: GPU memory trigger threshold configuration. Calculated by DCGMFIDEVMEMCOPY_UTIL metric
triggersGpuUtilizationThreshold Double: GPU utilization trigger threshold configuration. Calculated by DCGMFIDEVGPUUTIL metric
triggersHttpRate Double: Request count per 'window' seconds for the http trigger. Required if you use http trigger
triggersHttpWindow Double: Time window for rate calculation in seconds. Required if you use http trigger
triggersMemoryThreshold Double: Memory trigger threshold configuration
triggersSqsActivationQueueLength Double: Number of messages for activation
triggersSqsAwsEndpoint String: Custom AWS endpoint, left empty to use default aws endpoint
triggersSqsAwsRegion String: AWS region. Required if you use SQS trigger
triggersSqsQueueLength Double: Number of messages for one replica
triggersSqsQueueUrl String: URL of the SQS queue. Required if you use SQS trigger
triggersSqsScaleOnDelayed Boolean: Scale on delayed messages
triggersSqsScaleOnFlight Boolean: Scale on in-flight messages
triggersSqsSecretName String: Name of the secret with AWS credentials. Required if you use SQS trigger

cooldownPeriod number: Cooldown period between scaling actions in seconds
regionId number: Region id for the container
scaleMax number: Maximum scale for the container
scaleMin number: Minimum scale for the container. It can be set to 0, in which case the container will be downscaled to 0 when there is no load.
pollingInterval number: Polling interval for scaling triggers in seconds
readyContainers number: Status of the containers deployment. Number of ready instances
totalContainers number: Status of the containers deployment. Total number of instances
triggersCpuThreshold number: CPU trigger threshold configuration
triggersGpuMemoryThreshold number: GPU memory trigger threshold configuration. Calculated by DCGMFIDEVMEMCOPY_UTIL metric
triggersGpuUtilizationThreshold number: GPU utilization trigger threshold configuration. Calculated by DCGMFIDEVGPUUTIL metric
triggersHttpRate number: Request count per 'window' seconds for the http trigger. Required if you use http trigger
triggersHttpWindow number: Time window for rate calculation in seconds. Required if you use http trigger
triggersMemoryThreshold number: Memory trigger threshold configuration
triggersSqsActivationQueueLength number: Number of messages for activation
triggersSqsAwsEndpoint string: Custom AWS endpoint, left empty to use default aws endpoint
triggersSqsAwsRegion string: AWS region. Required if you use SQS trigger
triggersSqsQueueLength number: Number of messages for one replica
triggersSqsQueueUrl string: URL of the SQS queue. Required if you use SQS trigger
triggersSqsScaleOnDelayed boolean: Scale on delayed messages
triggersSqsScaleOnFlight boolean: Scale on in-flight messages
triggersSqsSecretName string: Name of the secret with AWS credentials. Required if you use SQS trigger

cooldown_period float: Cooldown period between scaling actions in seconds
region_id float: Region id for the container
scale_max float: Maximum scale for the container
scale_min float: Minimum scale for the container. It can be set to 0, in which case the container will be downscaled to 0 when there is no load.
polling_interval float: Polling interval for scaling triggers in seconds
ready_containers float: Status of the containers deployment. Number of ready instances
total_containers float: Status of the containers deployment. Total number of instances
triggers_cpu_threshold float: CPU trigger threshold configuration
triggers_gpu_memory_threshold float: GPU memory trigger threshold configuration. Calculated by DCGMFIDEVMEMCOPY_UTIL metric
triggers_gpu_utilization_threshold float: GPU utilization trigger threshold configuration. Calculated by DCGMFIDEVGPUUTIL metric
triggers_http_rate float: Request count per 'window' seconds for the http trigger. Required if you use http trigger
triggers_http_window float: Time window for rate calculation in seconds. Required if you use http trigger
triggers_memory_threshold float: Memory trigger threshold configuration
triggers_sqs_activation_queue_length float: Number of messages for activation
triggers_sqs_aws_endpoint str: Custom AWS endpoint, left empty to use default aws endpoint
triggers_sqs_aws_region str: AWS region. Required if you use SQS trigger
triggers_sqs_queue_length float: Number of messages for one replica
triggers_sqs_queue_url str: URL of the SQS queue. Required if you use SQS trigger
triggers_sqs_scale_on_delayed bool: Scale on delayed messages
triggers_sqs_scale_on_flight bool: Scale on in-flight messages
triggers_sqs_secret_name str: Name of the secret with AWS credentials. Required if you use SQS trigger

cooldownPeriod Number: Cooldown period between scaling actions in seconds
regionId Number: Region id for the container
scaleMax Number: Maximum scale for the container
scaleMin Number: Minimum scale for the container. It can be set to 0, in which case the container will be downscaled to 0 when there is no load.
pollingInterval Number: Polling interval for scaling triggers in seconds
readyContainers Number: Status of the containers deployment. Number of ready instances
totalContainers Number: Status of the containers deployment. Total number of instances
triggersCpuThreshold Number: CPU trigger threshold configuration
triggersGpuMemoryThreshold Number: GPU memory trigger threshold configuration. Calculated by DCGMFIDEVMEMCOPY_UTIL metric
triggersGpuUtilizationThreshold Number: GPU utilization trigger threshold configuration. Calculated by DCGMFIDEVGPUUTIL metric
triggersHttpRate Number: Request count per 'window' seconds for the http trigger. Required if you use http trigger
triggersHttpWindow Number: Time window for rate calculation in seconds. Required if you use http trigger
triggersMemoryThreshold Number: Memory trigger threshold configuration
triggersSqsActivationQueueLength Number: Number of messages for activation
triggersSqsAwsEndpoint String: Custom AWS endpoint, left empty to use default aws endpoint
triggersSqsAwsRegion String: AWS region. Required if you use SQS trigger
triggersSqsQueueLength Number: Number of messages for one replica
triggersSqsQueueUrl String: URL of the SQS queue. Required if you use SQS trigger
triggersSqsScaleOnDelayed Boolean: Scale on delayed messages
triggersSqsScaleOnFlight Boolean: Scale on in-flight messages
triggersSqsSecretName String: Name of the secret with AWS credentials. Required if you use SQS trigger

InferenceDeploymentLivenessProbe
, InferenceDeploymentLivenessProbeArgs

Enabled bool: Enable or disable probe
ExecCommand string: Command to execute in the container to determine the health
FailureThreshold double: Number of failed probes before the container is considered unhealthy
HttpGetHeaders Dictionary<string, string>: HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
HttpGetHost string: Host name to connect to, valid only for HTTP probes
HttpGetPath string: Path to access on the HTTP server, valid only for HTTP probes
HttpGetPort double: Number of the port to access on the HTTP server, valid only for HTTP probes
HttpGetSchema string: Scheme to use for connecting to the host, valid only for HTTP probes
InitialDelaySeconds double: Number of seconds after the container has started before liveness probes are initiated
PeriodSeconds double: How often (in seconds) to perform the probe
SuccessThreshold double: Minimum consecutive successes for the probe to be considered successful after having failed
TcpSocketPort double: Port to connect to
TimeoutSeconds double: Number of seconds after which the probe times out

Enabled bool: Enable or disable probe
ExecCommand string: Command to execute in the container to determine the health
FailureThreshold float64: Number of failed probes before the container is considered unhealthy
HttpGetHeaders map[string]string: HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
HttpGetHost string: Host name to connect to, valid only for HTTP probes
HttpGetPath string: Path to access on the HTTP server, valid only for HTTP probes
HttpGetPort float64: Number of the port to access on the HTTP server, valid only for HTTP probes
HttpGetSchema string: Scheme to use for connecting to the host, valid only for HTTP probes
InitialDelaySeconds float64: Number of seconds after the container has started before liveness probes are initiated
PeriodSeconds float64: How often (in seconds) to perform the probe
SuccessThreshold float64: Minimum consecutive successes for the probe to be considered successful after having failed
TcpSocketPort float64: Port to connect to
TimeoutSeconds float64: Number of seconds after which the probe times out

enabled Boolean: Enable or disable probe
execCommand String: Command to execute in the container to determine the health
failureThreshold Double: Number of failed probes before the container is considered unhealthy
httpGetHeaders Map<String,String>: HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
httpGetHost String: Host name to connect to, valid only for HTTP probes
httpGetPath String: Path to access on the HTTP server, valid only for HTTP probes
httpGetPort Double: Number of the port to access on the HTTP server, valid only for HTTP probes
httpGetSchema String: Scheme to use for connecting to the host, valid only for HTTP probes
initialDelaySeconds Double: Number of seconds after the container has started before liveness probes are initiated
periodSeconds Double: How often (in seconds) to perform the probe
successThreshold Double: Minimum consecutive successes for the probe to be considered successful after having failed
tcpSocketPort Double: Port to connect to
timeoutSeconds Double: Number of seconds after which the probe times out

enabled boolean: Enable or disable probe
execCommand string: Command to execute in the container to determine the health
failureThreshold number: Number of failed probes before the container is considered unhealthy
httpGetHeaders {[key: string]: string}: HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
httpGetHost string: Host name to connect to, valid only for HTTP probes
httpGetPath string: Path to access on the HTTP server, valid only for HTTP probes
httpGetPort number: Number of the port to access on the HTTP server, valid only for HTTP probes
httpGetSchema string: Scheme to use for connecting to the host, valid only for HTTP probes
initialDelaySeconds number: Number of seconds after the container has started before liveness probes are initiated
periodSeconds number: How often (in seconds) to perform the probe
successThreshold number: Minimum consecutive successes for the probe to be considered successful after having failed
tcpSocketPort number: Port to connect to
timeoutSeconds number: Number of seconds after which the probe times out

enabled bool: Enable or disable probe
exec_command str: Command to execute in the container to determine the health
failure_threshold float: Number of failed probes before the container is considered unhealthy
http_get_headers Mapping[str, str]: HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
http_get_host str: Host name to connect to, valid only for HTTP probes
http_get_path str: Path to access on the HTTP server, valid only for HTTP probes
http_get_port float: Number of the port to access on the HTTP server, valid only for HTTP probes
http_get_schema str: Scheme to use for connecting to the host, valid only for HTTP probes
initial_delay_seconds float: Number of seconds after the container has started before liveness probes are initiated
period_seconds float: How often (in seconds) to perform the probe
success_threshold float: Minimum consecutive successes for the probe to be considered successful after having failed
tcp_socket_port float: Port to connect to
timeout_seconds float: Number of seconds after which the probe times out

enabled Boolean: Enable or disable probe
execCommand String: Command to execute in the container to determine the health
failureThreshold Number: Number of failed probes before the container is considered unhealthy
httpGetHeaders Map<String>: HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
httpGetHost String: Host name to connect to, valid only for HTTP probes
httpGetPath String: Path to access on the HTTP server, valid only for HTTP probes
httpGetPort Number: Number of the port to access on the HTTP server, valid only for HTTP probes
httpGetSchema String: Scheme to use for connecting to the host, valid only for HTTP probes
initialDelaySeconds Number: Number of seconds after the container has started before liveness probes are initiated
periodSeconds Number: How often (in seconds) to perform the probe
successThreshold Number: Minimum consecutive successes for the probe to be considered successful after having failed
tcpSocketPort Number: Port to connect to
timeoutSeconds Number: Number of seconds after which the probe times out

InferenceDeploymentLogging
, InferenceDeploymentLoggingArgs

DestinationRegionId double
Enabled bool
RetentionPolicyPeriod double
TopicName string

DestinationRegionId float64
Enabled bool
RetentionPolicyPeriod float64
TopicName string

destinationRegionId Double
enabled Boolean
retentionPolicyPeriod Double
topicName String

destinationRegionId number
enabled boolean
retentionPolicyPeriod number
topicName string

destination_region_id float
enabled bool
retention_policy_period float
topic_name str

destinationRegionId Number
enabled Boolean
retentionPolicyPeriod Number
topicName String

InferenceDeploymentReadinessProbe
, InferenceDeploymentReadinessProbeArgs

Enabled bool: Enable or disable probe
ExecCommand string: Command to execute in the container to determine the health
FailureThreshold double: Number of failed probes before the container is considered unhealthy
HttpGetHeaders Dictionary<string, string>: HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
HttpGetHost string: Host name to connect to, valid only for HTTP probes
HttpGetPath string: Path to access on the HTTP server, valid only for HTTP probes
HttpGetPort double: Number of the port to access on the HTTP server, valid only for HTTP probes
HttpGetSchema string: Scheme to use for connecting to the host, valid only for HTTP probes
InitialDelaySeconds double: Number of seconds after the container has started before liveness probes are initiated
PeriodSeconds double: How often (in seconds) to perform the probe
SuccessThreshold double: Minimum consecutive successes for the probe to be considered successful after having failed
TcpSocketPort double: Port to connect to
TimeoutSeconds double: Number of seconds after which the probe times out

Enabled bool: Enable or disable probe
ExecCommand string: Command to execute in the container to determine the health
FailureThreshold float64: Number of failed probes before the container is considered unhealthy
HttpGetHeaders map[string]string: HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
HttpGetHost string: Host name to connect to, valid only for HTTP probes
HttpGetPath string: Path to access on the HTTP server, valid only for HTTP probes
HttpGetPort float64: Number of the port to access on the HTTP server, valid only for HTTP probes
HttpGetSchema string: Scheme to use for connecting to the host, valid only for HTTP probes
InitialDelaySeconds float64: Number of seconds after the container has started before liveness probes are initiated
PeriodSeconds float64: How often (in seconds) to perform the probe
SuccessThreshold float64: Minimum consecutive successes for the probe to be considered successful after having failed
TcpSocketPort float64: Port to connect to
TimeoutSeconds float64: Number of seconds after which the probe times out

enabled Boolean: Enable or disable probe
execCommand String: Command to execute in the container to determine the health
failureThreshold Double: Number of failed probes before the container is considered unhealthy
httpGetHeaders Map<String,String>: HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
httpGetHost String: Host name to connect to, valid only for HTTP probes
httpGetPath String: Path to access on the HTTP server, valid only for HTTP probes
httpGetPort Double: Number of the port to access on the HTTP server, valid only for HTTP probes
httpGetSchema String: Scheme to use for connecting to the host, valid only for HTTP probes
initialDelaySeconds Double: Number of seconds after the container has started before liveness probes are initiated
periodSeconds Double: How often (in seconds) to perform the probe
successThreshold Double: Minimum consecutive successes for the probe to be considered successful after having failed
tcpSocketPort Double: Port to connect to
timeoutSeconds Double: Number of seconds after which the probe times out

enabled boolean: Enable or disable probe
execCommand string: Command to execute in the container to determine the health
failureThreshold number: Number of failed probes before the container is considered unhealthy
httpGetHeaders {[key: string]: string}: HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
httpGetHost string: Host name to connect to, valid only for HTTP probes
httpGetPath string: Path to access on the HTTP server, valid only for HTTP probes
httpGetPort number: Number of the port to access on the HTTP server, valid only for HTTP probes
httpGetSchema string: Scheme to use for connecting to the host, valid only for HTTP probes
initialDelaySeconds number: Number of seconds after the container has started before liveness probes are initiated
periodSeconds number: How often (in seconds) to perform the probe
successThreshold number: Minimum consecutive successes for the probe to be considered successful after having failed
tcpSocketPort number: Port to connect to
timeoutSeconds number: Number of seconds after which the probe times out

enabled bool: Enable or disable probe
exec_command str: Command to execute in the container to determine the health
failure_threshold float: Number of failed probes before the container is considered unhealthy
http_get_headers Mapping[str, str]: HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
http_get_host str: Host name to connect to, valid only for HTTP probes
http_get_path str: Path to access on the HTTP server, valid only for HTTP probes
http_get_port float: Number of the port to access on the HTTP server, valid only for HTTP probes
http_get_schema str: Scheme to use for connecting to the host, valid only for HTTP probes
initial_delay_seconds float: Number of seconds after the container has started before liveness probes are initiated
period_seconds float: How often (in seconds) to perform the probe
success_threshold float: Minimum consecutive successes for the probe to be considered successful after having failed
tcp_socket_port float: Port to connect to
timeout_seconds float: Number of seconds after which the probe times out

enabled Boolean: Enable or disable probe
execCommand String: Command to execute in the container to determine the health
failureThreshold Number: Number of failed probes before the container is considered unhealthy
httpGetHeaders Map<String>: HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
httpGetHost String: Host name to connect to, valid only for HTTP probes
httpGetPath String: Path to access on the HTTP server, valid only for HTTP probes
httpGetPort Number: Number of the port to access on the HTTP server, valid only for HTTP probes
httpGetSchema String: Scheme to use for connecting to the host, valid only for HTTP probes
initialDelaySeconds Number: Number of seconds after the container has started before liveness probes are initiated
periodSeconds Number: How often (in seconds) to perform the probe
successThreshold Number: Minimum consecutive successes for the probe to be considered successful after having failed
tcpSocketPort Number: Port to connect to
timeoutSeconds Number: Number of seconds after which the probe times out

InferenceDeploymentStartupProbe
, InferenceDeploymentStartupProbeArgs

Enabled bool: Enable or disable probe
ExecCommand string: Command to execute in the container to determine the health
FailureThreshold double: Number of failed probes before the container is considered unhealthy
HttpGetHeaders Dictionary<string, string>: HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
HttpGetHost string: Host name to connect to, valid only for HTTP probes
HttpGetPath string: Path to access on the HTTP server, valid only for HTTP probes
HttpGetPort double: Number of the port to access on the HTTP server, valid only for HTTP probes
HttpGetSchema string: Scheme to use for connecting to the host, valid only for HTTP probes
InitialDelaySeconds double: Number of seconds after the container has started before liveness probes are initiated
PeriodSeconds double: How often (in seconds) to perform the probe
SuccessThreshold double: Minimum consecutive successes for the probe to be considered successful after having failed
TcpSocketPort double: Port to connect to
TimeoutSeconds double: Number of seconds after which the probe times out

Enabled bool: Enable or disable probe
ExecCommand string: Command to execute in the container to determine the health
FailureThreshold float64: Number of failed probes before the container is considered unhealthy
HttpGetHeaders map[string]string: HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
HttpGetHost string: Host name to connect to, valid only for HTTP probes
HttpGetPath string: Path to access on the HTTP server, valid only for HTTP probes
HttpGetPort float64: Number of the port to access on the HTTP server, valid only for HTTP probes
HttpGetSchema string: Scheme to use for connecting to the host, valid only for HTTP probes
InitialDelaySeconds float64: Number of seconds after the container has started before liveness probes are initiated
PeriodSeconds float64: How often (in seconds) to perform the probe
SuccessThreshold float64: Minimum consecutive successes for the probe to be considered successful after having failed
TcpSocketPort float64: Port to connect to
TimeoutSeconds float64: Number of seconds after which the probe times out

enabled Boolean: Enable or disable probe
execCommand String: Command to execute in the container to determine the health
failureThreshold Double: Number of failed probes before the container is considered unhealthy
httpGetHeaders Map<String,String>: HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
httpGetHost String: Host name to connect to, valid only for HTTP probes
httpGetPath String: Path to access on the HTTP server, valid only for HTTP probes
httpGetPort Double: Number of the port to access on the HTTP server, valid only for HTTP probes
httpGetSchema String: Scheme to use for connecting to the host, valid only for HTTP probes
initialDelaySeconds Double: Number of seconds after the container has started before liveness probes are initiated
periodSeconds Double: How often (in seconds) to perform the probe
successThreshold Double: Minimum consecutive successes for the probe to be considered successful after having failed
tcpSocketPort Double: Port to connect to
timeoutSeconds Double: Number of seconds after which the probe times out

enabled boolean: Enable or disable probe
execCommand string: Command to execute in the container to determine the health
failureThreshold number: Number of failed probes before the container is considered unhealthy
httpGetHeaders {[key: string]: string}: HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
httpGetHost string: Host name to connect to, valid only for HTTP probes
httpGetPath string: Path to access on the HTTP server, valid only for HTTP probes
httpGetPort number: Number of the port to access on the HTTP server, valid only for HTTP probes
httpGetSchema string: Scheme to use for connecting to the host, valid only for HTTP probes
initialDelaySeconds number: Number of seconds after the container has started before liveness probes are initiated
periodSeconds number: How often (in seconds) to perform the probe
successThreshold number: Minimum consecutive successes for the probe to be considered successful after having failed
tcpSocketPort number: Port to connect to
timeoutSeconds number: Number of seconds after which the probe times out

enabled bool: Enable or disable probe
exec_command str: Command to execute in the container to determine the health
failure_threshold float: Number of failed probes before the container is considered unhealthy
http_get_headers Mapping[str, str]: HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
http_get_host str: Host name to connect to, valid only for HTTP probes
http_get_path str: Path to access on the HTTP server, valid only for HTTP probes
http_get_port float: Number of the port to access on the HTTP server, valid only for HTTP probes
http_get_schema str: Scheme to use for connecting to the host, valid only for HTTP probes
initial_delay_seconds float: Number of seconds after the container has started before liveness probes are initiated
period_seconds float: How often (in seconds) to perform the probe
success_threshold float: Minimum consecutive successes for the probe to be considered successful after having failed
tcp_socket_port float: Port to connect to
timeout_seconds float: Number of seconds after which the probe times out

enabled Boolean: Enable or disable probe
execCommand String: Command to execute in the container to determine the health
failureThreshold Number: Number of failed probes before the container is considered unhealthy
httpGetHeaders Map<String>: HTTP headers to use when sending a HTTP GET request, valid only for HTTP probes
httpGetHost String: Host name to connect to, valid only for HTTP probes
httpGetPath String: Path to access on the HTTP server, valid only for HTTP probes
httpGetPort Number: Number of the port to access on the HTTP server, valid only for HTTP probes
httpGetSchema String: Scheme to use for connecting to the host, valid only for HTTP probes
initialDelaySeconds Number: Number of seconds after the container has started before liveness probes are initiated
periodSeconds Number: How often (in seconds) to perform the probe
successThreshold Number: Minimum consecutive successes for the probe to be considered successful after having failed
tcpSocketPort Number: Port to connect to
timeoutSeconds Number: Number of seconds after which the probe times out

Import

import using <project_id>:<inference_deployment_name> format

$ pulumi import gcore:index/inferenceDeployment:InferenceDeployment inf1 1:my-first-inference

To learn more about importing existing cloud resources, see Importing resources.

Package Details

Repository: gcore g-core/terraform-provider-gcore
License
Notes: This Pulumi package is based on the gcore Terraform Provider.

gcore 0.19.0 published on Monday, Apr 14, 2025 by g-core

g-core/terraform-provider-gcore

Example Usage
Basic example
Creating inference deployment
Creating inference deployment with sqs trigger
Create InferenceDeployment Resource
Constructor syntax
Constructor example
InferenceDeployment Resource Properties
Inputs
Outputs
Look up Existing InferenceDeployment Resource
Supporting Types
Import
Package Details

Request a Change

gcore.InferenceDeployment

On this page

On this page

Example Usage

Prerequisite

Basic example

Creating inference deployment

Creating inference deployment with sqs trigger

Create InferenceDeployment Resource

Constructor syntax

Parameters

Constructor example

InferenceDeployment Resource Properties

Inputs

Outputs

Look up Existing InferenceDeployment Resource

Supporting Types

InferenceDeploymentContainer
, InferenceDeploymentContainerArgs

InferenceDeploymentLivenessProbe
, InferenceDeploymentLivenessProbeArgs

InferenceDeploymentLogging
, InferenceDeploymentLoggingArgs

InferenceDeploymentReadinessProbe
, InferenceDeploymentReadinessProbeArgs

InferenceDeploymentStartupProbe
, InferenceDeploymentStartupProbeArgs

Import

Package Details

On this page

On this page

gcore.InferenceDeployment

On this page

On this page

Example Usage

Prerequisite

Basic example

Creating inference deployment

Creating inference deployment with sqs trigger

Create InferenceDeployment Resource

Constructor syntax

Parameters

Constructor example

InferenceDeployment Resource Properties

Inputs

Outputs

Look up Existing InferenceDeployment Resource

Supporting Types

InferenceDeploymentContainer, InferenceDeploymentContainerArgs

InferenceDeploymentLivenessProbe, InferenceDeploymentLivenessProbeArgs

InferenceDeploymentLogging, InferenceDeploymentLoggingArgs

InferenceDeploymentReadinessProbe, InferenceDeploymentReadinessProbeArgs

InferenceDeploymentStartupProbe, InferenceDeploymentStartupProbeArgs

Import

Package Details

On this page

On this page

InferenceDeploymentContainer
, InferenceDeploymentContainerArgs

InferenceDeploymentLivenessProbe
, InferenceDeploymentLivenessProbeArgs

InferenceDeploymentLogging
, InferenceDeploymentLoggingArgs

InferenceDeploymentReadinessProbe
, InferenceDeploymentReadinessProbeArgs

InferenceDeploymentStartupProbe
, InferenceDeploymentStartupProbeArgs