Nomad HTTP health check does not seem to occur - nomad

It seems the HTTP health check is not occurring, I've come to this conclusion due to the HTTP debug log not showing any regular periodic requests.
Is there any additional configuration request for a health check to occur?
job "example" {
datacenters = ["dc1"]
type = "service"
update {
max_parallel = 1
min_healthy_time = "10s"
healthy_deadline = "3m"
progress_deadline = "10m"
auto_revert = false
canary = 0
}
migrate {
max_parallel = 1
health_check = "checks"
min_healthy_time = "10s"
healthy_deadline = "5m"
}
group "app" {
count = 1
restart {
attempts = 2
interval = "30m"
delay = "15s"
mode = "fail"
}
ephemeral_disk {
size = 300
}
task "app" {
driver = "docker"
config {
image = "localhost:5000/myhub:latest"
command = "python"
args = [
"manage.py",
"runserver",
"0.0.0.0:8001"
]
port_map {
app = 8001
}
network_mode = "host"
}
resources {
cpu = 500
memory = 256
network {
mbits = 10
port "app" {}
}
}
service {
name = "myhub"
port = "app"
check {
name = "alive"
type = "http"
port = "app"
path = "/"
interval = "10s"
timeout = "3s"
}
}
}
}
}

It seems Consul must be installed for this to occur.
Also make sure to install Consul v1.4.2 or later as v1.4.1 seems to have a bug: https://github.com/hashicorp/consul/issues/5270

Related

hashicorp nomad .. why my app is not connecting to the database (Postgres , golang api) provider="nomad"

Help required.. I tried in many ways but unfortunately app is not connecting to the database.
DB_CONN is the env variable that app is expecting
service provider is nomad.
so it must be a simple service discovery. Created two groups in same job file,infact I tried differently as well but it did not work.
variable "datacenters" {
description = "A list of datacenters in the region which are eligible for task placement."
type = list(string)
default = ["dc1"]
}
variable "region" {
description = "The region where the job should be placed."
type = string
default = "global"
}
variable "postgres_db" {
description = "Postgres DB name"
default = "vehicle_master_bd"
}
variable "postgres_user" {
description = "Postgres DB User"
default = "postgres"
}
variable "postgres_password" {
description = "Postgres DB Password"
default = "postgres"
}
# Begin Job Spec
job "cres-vehicleMaster" {
type = "service"
region = var.region
datacenters = var.datacenters
group "db" {
network {
port "db" {
to = 5432
}
}
task "postgres" {
driver = "docker"
meta {
service = "database"
}
service {
name = "database"
port = "db"
provider = "nomad"
}
config {
image = "postgres"
ports = ["db"]
}
resources {
cpu = 500
memory = 500
}
env {
POSTGRES_DB = var.postgres_db
POSTGRES_USER = var.postgres_user
POSTGRES_PASSWORD = var.postgres_password
}
}
}
group "vehicleMaster-api" {
count = 1
network {
port "api" {
to = 50080
}
}
restart {
attempts = 2
interval = "30m"
delay = "15s"
mode = "fail"
}
task "vehicleMaster-api" {
driver = "docker"
service {
name = "vehicleMaster-api"
tags = ["vehicleMaster", "RESTAPI"]
port = "api"
provider = "nomad"
}
template {
data = <<EOH
{{ range nomadService "database" }}
DB_CONN="host={{ .Address }} port={{ .Port }} user=${var.postgres_user} password=${var.postgres_password} dbname=${var.postgres_db} sslmode=disable"
{{ end }}
EOH
destination = "local/env.txt"
env = true
}
env {
// DB_CONN = "host=127.0.0.1 port=5432 user=postgres password=postgres dbname=vehicle_master_bd sslmode=disable"
// DB_CONN = "host=${NOMAD_IP_postgres} port=${NOMAD_PORT_postgres} user=${var.postgres_user} password=${var.postgres_password} dbname=${var.postgres_db} sslmode=disable"
PORT = "50080"
}
config {
image = "jpalaparthi/vehiclemaster:v0.0.3"
ports = ["api"]
}
resources {
cpu = 500 # 500 MHz
memory = 512 # 256MB
}
}
}
}

connect db and back using service name

I try to use service name to connect my nomad job Postgresql and the app.
Using agent node where the job is running works fine but with my job.service.consul do not work.
As decribe in application.properties using ip or host works but service name does'nt work
Can you help me ?
Application.properties
#Enable ddl
spring.jpa.hibernate.ddl-auto=none
##Works
#spring.datasource.url=jdbc:postgresql://10.3.52.121:5432/postgres
#spring.datasource.url=jdbc:postgresql://vmc####26.dev.##.##.##.##.##:5432/postgres
##DON'T WORK WITH SERVICE NAME
#spring.datasource.url=jdbc:postgresql://pgsql-##.service.consul:5432/postgres
spring.datasource.url=jdbc:postgresql://${DB_SERVICE_NAME}:5432/postgres
spring.datasource.username=${POSTGRES_USER}
spring.datasource.password=${POSTGRES_PASSWORD}
spring.jpa.properties.hibernate.dialect= org.hibernate.dialect.PostgreSQLDialect
spring.jpa.hibernate.naming.physical-strategy=org.hibernate.boot.model.naming.PhysicalNamingStrategyStandardImpl
Postgresql job
job "pgsql-qa" {
datacenters = ["###"]
type = "service"
vault {
policies = ["###"]
change_mode = "noop"
}
group "pgsql-qa" {
count = 1
task "pgsql-qa" {
driver = "docker"
config {
image = "postgres"
volumes = [
"name=####pgsqldb,io_priority=high,size=5,repl=1:/var/lib/postgresql/data"
]
volume_driver = "pxd"
network_mode = "bridge"
port_map {
db = 5432
}
}
template {
data = <<EOH
POSTGRES_USER="{{ with secret "app/###/###/db/admin" }}{{ .Data.data.user }}{{end}}"
POSTGRES_PASSWORD="{{ with secret "app/###/###/db/admin" }}{{ .Data.data.password }}{{end}}"
EOH
destination = "secrets/db"
env = true
}
logs {
max_files = 5
max_file_size = 15
}
resources {
cpu = 1000
memory = 1024
network {
mbits = 10
port "db" {
static = 5432
}
}
}
service {
name = "pgsql-qa"
tags = ["urlprefix-pgsqlqadb proto=tcp"]
port = "db"
check {
name = "alive"
type = "tcp"
interval = "10s"
timeout = "2s"
port = "db"
}
}
}
restart {
attempts = 10
interval = "5m"
delay = "25s"
mode = "delay"
}
}
update {
max_parallel = 1
min_healthy_time = "5s"
healthy_deadline = "3m"
auto_revert = false
canary = 0
}
}
I found the solution. Just add this line in template {} section to get the IP and the random port created by Nomad.
db_service_name change this line with your service name.
MY_DB = "{{ range service "${db_service_name}" }}{{ .Address }}:{{ .Port }}{{ end }}"
Thx

finding proxmox ip address with terraform and Ansible

I'm having this code:
terraform {
required_providers {
proxmox = {
source = "telmate/proxmox"
version = "2.8.0"
}
}
}
provider "proxmox" {
pm_api_url = "https://url/api2/json"
pm_user = "user"
pm_password = "pass"
pm_tls_insecure = true
}
resource "proxmox_vm_qemu" "test" {
count = 1
name = "test-${count.index + 1}"
target_node = "prm01"
clone = "image-here"
guest_agent_ready_timeout = 60
os_type = "cloud-init"
cores = 2
sockets = 1
cpu = "host"
memory = 4048
scsihw = "virtio-scsi-pci"
bootdisk = "scsi0"
disk {
slot = 0
size = "32G"
type = "scsi"
storage = "local-lvm"
iothread = 1
}
network {
model = "virtio"
bridge = "vmbr0"
}
lifecycle {
ignore_changes = [
network,
]
}
}
output "proxmox_ip_address_default" {
description = "Current IP Default"
value = proxmox_vm_qemu.test.*.default_ipv4_address
}
This is created via Ansible playbook. What I'm trying to find is the IP assigned to the machine as I'm running then another playbook to provision that machine. The problem is that I didn't found any solution on how to find the assigned IP address of that machine
Output it is empty!
Any help?

Terraform - Azure Windows VM winrm connection issue

I want to create windows azure VM, copy some file and run some simple command on that VM using terraform script.
Problem is : I am able to create VM but not able to connect via winrm.
provider "azurerm" {
subscription_id = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
tenant_id = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
}
resource "azurerm_virtual_network" "vnet" {
name = "cmTFVnet"
address_space = ["10.0.0.0/16"]
location = "South India"
resource_group_name = "cservice"
}
resource "azurerm_subnet" "subnet" {
name = "cmTFSubnet"
resource_group_name = "cservice"
virtual_network_name = "${azurerm_virtual_network.vnet.name}"
address_prefix = "10.0.2.0/24"
}
resource "azurerm_public_ip" "publicip" {
name = "cmTFPublicIP"
location = "South India"
resource_group_name = "cservice"
public_ip_address_allocation = "dynamic"
}
resource "azurerm_network_security_group" "nsg" {
name = "cmTFNSG"
location = "South India"
resource_group_name = "cservice"
security_rule {
name = "SSH"
priority = 340
direction = "Inbound"
access = "Allow"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "22"
source_address_prefix = "*"
destination_address_prefix = "*"
}
security_rule {
name = "winrm"
priority = 1010
direction = "Inbound"
access = "Allow"
protocol = "Tcp"
source_port_range = "*"
destination_port_range = "5985"
source_address_prefix = "*"
destination_address_prefix = "*"
}
security_rule {
name = "winrm-out"
priority = 100
direction = "Outbound"
access = "Allow"
protocol = "*"
source_port_range = "*"
destination_port_range = "5985"
source_address_prefix = "*"
destination_address_prefix = "*"
}
}
resource "azurerm_network_interface" "nic" {
name = "cmNIC"
location = "South India"
resource_group_name = "cservice"
network_security_group_id = "${azurerm_network_security_group.nsg.id}"
ip_configuration {
name = "compilerNICConfg"
subnet_id = "${azurerm_subnet.subnet.id}"
private_ip_address_allocation = "dynamic"
public_ip_address_id = "${azurerm_public_ip.publicip.id}"
}
}
resource "azurerm_virtual_machine" "vm" {
name = "cmTFVM"
location = "South India"
resource_group_name = "cservice"
network_interface_ids = ["${azurerm_network_interface.nic.id}"]
vm_size = "Standard_D2s_v3"
storage_image_reference
{
id = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
}
storage_os_disk {
name = "cmOsDisk"
managed_disk_type = "Premium_LRS"
create_option = "FromImage"
}
os_profile {
computer_name = "hostname"
admin_username = "test"
admin_password = "test#123"
}
os_profile_windows_config {
enable_automatic_upgrades = "true"
provision_vm_agent ="true"
winrm = {
protocol = "http"
}
}
provisioner "remote-exec" {
connection = {
type = "winrm"
user = "test"
password = "test#123"
agent = "false"
https = false
insecure = true
}
inline = [
"cd..",
"cd..",
"cd docker",
"mkdir test"
]
}
}
VM is created successfully but not able to connect by WINRM
but I am getting following error in "remote-exec":
azurerm_virtual_machine.vm: timeout - last error: unknown error Post
http://:5985/wsman: dial tcp :5985: connectex: A connection attempt
failed because the connected party did not properly respond after a
period of time, or established connection failed because connected
host has failed to respond.
or http response error: 401 - invalid content type
When you create the Windows Azure VM, the WINRM is not configured by default. So if you want to connect the VM through the WINRM, you should configure the WINRM after the VM creation time, or in the creation time.
You can follow the steps in Configure WinRM after virtual machine creation. And you can also configure it in the creation time. There is an example shows that through Azure template. It will also provide a little help. See Deploys a Windows VM and Configures a WinRM Https listener.

akka.cluster with double asp.net webapi on IIS

In out cluster we have five nodes composite of:
2 seed nodes (backend)
1 worker
2 webapi on IIS
The cluster is joined, up and running; but the second IIS when perform the first message to the cluster via router make all cluster unreachable and dissociated.
In addition the second IIS can't deliver any message.
Here is my IIS config:
<hocon>
<![CDATA[
akka.loglevel = INFO
akka.log-config-on-start = off
akka.stdout-loglevel = INFO
akka.actor {
provider = "Akka.Cluster.ClusterActorRefProvider, Akka.Cluster"
deployment {
/Process {
router = round-robin-group
routees.paths = ["/user/Process"] # path of routee on each node
# nr-of-instances = 3 # max number of total routees
cluster {
enabled = on
allow-local-routees = off
use-role = Process
}
}
}
debug {
receive = on
autoreceive = on
lifecycle = on
event-stream = on
unhandled = on
}
}
akka.remote {
helios.tcp {
# transport-class = "Akka.Remote.Transport.Helios.HeliosTcpTransport, Akka.Remote"
# applied-adapters = []
# transport-protocol = tcp
port = 0
hostname = 172.16.1.8
}
log-remote-lifecyclo-events = DEBUG
}
akka.cluster {
seed-nodes = [
"akka.tcp://ClusterActorSystem#172.16.1.8:2551",
"akka.tcp://ClusterActorSystem#172.16.1.8:2552"
]
roles = [Send]
auto-down-unreachable-after = 10s
# how often should the node send out gossip information?
gossip-interval = 1s
# discard incoming gossip messages if not handled within this duration
gossip-time-to-live = 2s
}
# http://getakka.net/docs/persistence/at-least-once-delivery
akka.persistence.at-least-once-delivery.redeliver-interval = 300s
# akka.persistence.at-least-once-delivery.redelivery-burst-limit =
# akka.persistence.at-least-once-delivery.warn-after-number-of-unconfirmed-attempts =
akka.persistence.at-least-once-delivery.max-unconfirmed-messages = 1000000
akka.persistence.journal.plugin = "akka.persistence.journal.sql-server"
akka.persistence.journal.publish-plugin-commands = on
akka.persistence.journal.sql-server {
class = "Akka.Persistence.SqlServer.Journal.SqlServerJournal, Akka.Persistence.SqlServer"
plugin-dispatcher = "akka.actor.default-dispatcher"
table-name = EventJournal
schema-name = dbo
auto-initialize = on
connection-string-name = "HubAkkaPersistence"
refresh-interval = 1s
connection-timeout = 30s
timestamp-provider = "Akka.Persistence.Sql.Common.Journal.DefaultTimestampProvider, Akka.Persistence.Sql.Common"
metadata-table-name = Metadata
}
akka.persistence.snapshot-store.plugin = ""akka.persistence.snapshot-store.sql-server""
akka.persistence.snapshot-store.sql-server {
class = "Akka.Persistence.SqlServer.Snapshot.SqlServerSnapshotStore, Akka.Persistence.SqlServer"
plugin-dispatcher = ""akka.actor.default-dispatcher""
connection-string-name = "HubAkkaPersistence"
schema-name = dbo
table-name = SnapshotStore
auto-initialize = on
}
]]>
</hocon>
inside the global.asax we create a new router to the cluster:
ClusterActorSystem = ActorSystem.Create("ClusterActorSystem");
var backendRouter =
ClusterActorSystem.ActorOf(
Props.Empty.WithRouter(FromConfig.Instance), "Process");
Send = SistemiHubClusterActorSystem.ActorOf(
Props.Create(() => new Common.Actors.Send(backendRouter)),
"Send");
and here is our backend config:
<hocon><![CDATA[
akka.loglevel = INFO
akka.log-config-on-start = on
akka.stdout-loglevel = INFO
akka.actor {
provider = "Akka.Cluster.ClusterActorRefProvider, Akka.Cluster"
debug {
receive = on
autoreceive = on
lifecycle = on
event-stream = on
unhandled = on
}
}
akka.remote {
helios.tcp {
# transport-class = "Akka.Remote.Transport.Helios.HeliosTcpTransport, Akka.Remote"
# applied-adapters = []
# transport-protocol = tcp
#
# seed-node ports 2551 and 2552
# non-seed-node port 0
port = 2551
hostname = 172.16.1.8
}
log-remote-lifecyclo-events = INFO
}
akka.cluster {
seed-nodes = [
"akka.tcp://ClusterActorSystem#172.16.1.8:2551",
"akka.tcp://ClusterActorSystem#172.16.1.8:2552"
]
roles = [Process]
auto-down-unreachable-after = 10s
}
]]></hocon>
The issue in present using Akka 1.1 and Akka 1.2
UPDATE
I have found that the issue is related to our LoadBalancer (NetScaler) if I call each IIS directly is working fine. If called by the balancer I face the reported issue; the balancer is trasparent (it only add some headers to the request). What can I check to solve this issue?
Finally I found the problem, we are using akka.persistence that requires a specific value declination for the PersistenceId for each IIS.

Resources