Python multiple replacement with matching - python-2.x

Work in Progress Json2yaml converter for Ansible (http://docs.ansible.com/ansible/ec2_group_module.html) creation
Link to Project:
https://github.com/shdobxr/json2yamlAnsible/blob/master/json2yaml.py
Answered! See above for project.
I have been researching this on stack. And it's just not clicking.
Python replace multiple strings
Here is my attempt, please be kind I have dabbled with python in the past. The rust is showing here:
#!/usr/local/bin/python
import re, sys, json, yaml
with open(sys.argv[1]) as f:
# print yaml.safe_dump(json.load(f), default_flow_style=False)
a = yaml.safe_dump(json.load(f), default_flow_style=False)
c = sys.argv[1] + ".yml"
text_file = open(c, "w")
text_file.write(a)
text_file.close
replacements = {
' ToPort:': ' to_port:',
' - FromPort:': ' from_port:',
'UserIdGroupPairs:': '',
'Vpc:': '',
'VpcPeeringConnectionId:': '',
'UserIdGroupPairs:': '',
'PrefixListIds: []': '',
'- Description:': ' description:',
' GroupName:': ' - name:',
' - IpProtocol:': ' - proto:',
'- Description:': ' description:',
'SecurityGroups:': '- name:',
' IpPermissions:': ' rules:',
' IpPermissionsEgress:': ' rules_egress:',
' GroupId:': '',
' - GroupId:': ' group_id:'
}
replacements = dict((re.escape(k), v) for k, v in replacements.iteritems())
#pattern = re.compile('|'.join(replacements.keys()))
remove_pattern = re.compile('|'.join(k for k, v in replacements.iteritems() if v is None))
replace_pattern = re.compile('|'.join(k for k, v in replacements.iteritems() if v is not None))
def rewrite(text):
if remove_pattern.search(text):
return ''
return replace_pattern.sub(lambda m: replacements[re.escape(m.group())], text)
with open(c, 'rt') as fin:
with open('out.txt', 'wt') as fout:
for line in fin:
fout.write(rewrite(line))
Test file (save this as test.json)
{
"SecurityGroups": [
{
"IpPermissionsEgress": [
{
"PrefixListIds": [],
"FromPort": 22,
"IpRanges": [
{
"CidrIp": "10.0.0.0/24"
}
],
"ToPort": 22,
"IpProtocol": "tcp",
"UserIdGroupPairs": [
{
"UserId": "XXXXXXXXXXXX",
"GroupId": "sg-xxxxxxxx"
}
]
},
{
"PrefixListIds": [],
"FromPort": 3389,
"IpRanges": [
{
"CidrIp": "10.0.0.0/24"
}
],
"ToPort": 3389,
"IpProtocol": "tcp",
"UserIdGroupPairs": [
{
"UserId": "XXXXXXXXXXXX",
"GroupId": "sg-xxxxxxxx"
}
]
}
],
"Description": "TEST JSON",
"Tags": [
{
"Value": "Test JSON",
"Key": "Name"
}
],
"IpPermissions": [
{
"PrefixListIds": [],
"FromPort": 22,
"IpRanges": [
{
"CidrIp": "10.0.0.0/24"
}
],
"ToPort": 22,
"IpProtocol": "tcp",
"UserIdGroupPairs": [
{
"UserId": "XXXXXXXXXXXX",
"GroupId": "sg-xxxxxxxx"
}
]
},
{
"PrefixListIds": [],
"FromPort": 3389,
"IpRanges": [
{
"CidrIp": "10.0.0.0/24"
}
],
"ToPort": 3389,
"IpProtocol": "tcp",
"UserIdGroupPairs": [
{
"UserId": "XXXXXXXXXXXX",
"GroupId": "sg-xxxxxxxx"
}
]
}
],
"GroupName": "Test JSON",
"VpcId": "vpc-XXXXXXXX",
"OwnerId": "XXXXXXXXXXXX",
"GroupId": "sg-xxxxxxxx"
}
]
}

The answer you cited adapted for your use case:
import re
replacements = {
'ToPort': 'to_port',
'FromPort': None
}
replacements = dict((re.escape(k), v) for k, v in replacements.iteritems())
remove_pattern = re.compile('|'.join(
k for k, v in replacements.iteritems() if v is None))
replace_pattern = re.compile('|'.join(
k for k, v in replacements.iteritems() if v is not None))
def rewrite(text):
if remove_pattern.search(text):
return ''
return replace_pattern.sub(
lambda m: replacements[re.escape(m.group())], text)
with open('in.txt', 'rt') as fin:
with open('out.txt', 'wt') as fout:
for line in fin:
fout.write(rewrite(line))

Related

Elastic \ Opensearch life cycle management - what is the difference between read_write & open actions

I want to use life cycle management, the goal is to delete messages after 14 days
What should be the action in the first stage? Open or Read_write
What is the difference between the two actions?
{
"policy": {
"policy_id": "delete_after14_days",
"description": "index delete"
"schema_version": 1,
"error_notification": null,
"default_state": "open",
"states": [
{
"name": "hot",
"actions": [
{
**"open": {} or "read_write": {}**
}
],
"transitions": [
{
"state_name": "delete",
"conditions": {
"min_index_age": "14d"
}
}
]
},
{
"name": "delete",
"actions": [
{
"delete": {}
}
],
"transitions": []
}
],
"ism_template": [
{
"index_patterns": [
"audit-*"
],
"priority": 0
}
]
}
}

How to search key by passing value in json_query Ansible

I am calling API and getting below output but from the output and i want to find the key based on value input and my input value is "vpc-tz" how to achieve this in ansible using json_query?
{
"json": {
"allScopes": [
{
"
"clusters": {
"clusters": [
{
"cluster": {
"clientHandle": "",
"type": {
"name": "ClusterComputeResource"
},
"universalRevision": 0,
"vsmUuid": "423B1819-9495-4F10-A96A-6D8284E51B29"
}
}
]
},
"controlPlaneMode": "UNICAST_MODE",
"description": "",
"extendedAttributes": [
],
"id": "vdnscope-6",
"isTemporal": false,
"isUniversal": false,
"name": "vpc-tz",
"nodeId": "85e0073d-0e5a-4f04-889b-42df771aebf8",
"objectId": "vdnscope-6",
"objectTypeName": "VdnScope",
"revision": 0,
"type": {
"name": "VdnScope"
},
"universalRevision": 0,
"virtualWireCount": 0,
"vsmUuid": "423B1819-9495-4F10-A96A-6D8284E51B29"
},
]
}
}
Here is a query which works:
json.allScopes[?name=='vpc-tz'].name

I am using awk command but unable to print IPs in in column using shell script

I have a file "LM1" in below format and I wanted these to be printed in separated column like this below example
I have used below awk command to achieve this but I cannot extract IP4 field in csv file rest of the data is coming in columns.
I execute below command to achieve my requirements
I have a file "LM1" in below format and I wanted these to be printed in separated column like this below example
awk -F": " -v OFS="\t" '
BEGIN {print "CustName", "OS", "LM_Name", "Name", "IPv4", "Status" }
{
gsub(/"/,"")
sub(/^[[:blank:]]+/,"")
}
$1 == "customer_name" {
if ("customer_name" in data && !have_data)
print data["customer_name"]
have_data = 0
}
{
data[$1] = $2
}
("os_type" in data) && ("local_hostname" in data) && ("name" in data) && ("local_ipv4" in data) && ("status" in data) {
print data["customer_name"], data["os_type"], data["local_hostname"], data["name"], data["local_ipv4"], data["status"]
delete data["os_type"]
delete data["local_hostname"]
delete data["name"]
delete data["local_ipv4"]
delete data["status"]
have_data = 1
}
' LM1 | column -s $'\t' -t > LM-Status-Report.csv
Expected (Output) data format:
Column A Column B Column C Column D
==========================================================
Customer Local_Hostname IP4 Addresse Status
==========================================================
ABC ABC-log-01 10.9.9.9,10.9.3.4 OK
ABC ABC-log-02 10.8.8.8 New
XYZ XYZ-log-01 10.10.2.2,10.2.4.6 Ok
XYZ XYZ-log-02 10.2.3.4 New
RAW Data existing (input) format:
customer_name: "ABC"
{
"syslog": {
"created": {
"at": 1478195183
"by": 0
}
"id": "886707D0-4069-1005-8535-0050568525D9"
"metadata": {
"local_hostname": "ABC-log-01"
"local_ipv4": [
"10.9.9.9"
"10.9.3.4"
]
"public_ipv4": [
"127.0.0.1"
"10.1.1.1"
]
"total_mem_mb": 3884
}
"modified": {
"at": 1478195247
"by": 0
}
"name": "ABC-log-01"
"policy_id": "9125663A-04EA-4F1D-A436-ADFEF069D4BA"
"stats": {
"last_day_bytes": 0
"last_update_ts": 0
}
"status": {
"details": []
"status": "ok"
}
}
}
{
"eventlog": {
"created": {
"at": 1499888362
"by": 0
}
"id": "A4D2EA92-5423-1005-B1B3-0050568505BC"
"metadata": {
"os_type": "windows"
"local_hostname": "ABC-log-02"
"local_ipv4": [
"10.8.8.8"
]
"num_logical_processors": 2
"os_details": "Windows Server (R) 2008 Standard; 6.0.6002; Service Pack 2; x86"
"public_ipv4": [
"10.1.1.1"
]
"public_ipv4": [
"10.4.4.4"
]
"total_mem_mb": 2046
}
"modified": {
"at": 1512582221
"by": 13939
}
"name": "ABC-log-03"
"stats": {
"last_day_bytes": 0
}
"status": {
"details": []
"status": "new"
"timestamp": 1508228598
"updated": 1508228598
}
"tags": []
}
}
customer_name: "XYZ"
{
"syslog": {
"created": {
"at": 1507196910
"by": 0
}
"id": "9E47B629-5AC9-1005-B1B3-0050568505BC"
"metadata": {
"host_type": "standalone"
"os_type": "unix"
"version": "1"
"local_hostname": "XYZ-log-01"
"local_ipv6": [
"10.10.1.2"
"10.10.2.3"
]
"num_logical_processors": 4
"os_details": "Linux; 2.6.32-696.6.3.el6.x86_64; #1 SMP Wed Jul 12 14:17:22 UTC 2017; x86_64"
"public_ipv4": [
"10.10.2.2"
"10.2.4.6"
]
"total_mem_mb": 3951
}
"modified": {
"at": 1507196910
"by": 0
}
"name": "XYZ-log-01"
"policy_id": "7135663A-04EA-4F1D-A436-ADFEF069D4BA"
"stats": {
"last_day_bytes": 0
"last_update_ts": 0
}
"status": {
"details": []
"status": "ok"
"timestamp": 1508228712
"updated": 1519723490
}
"tags": []
}
}
{
"eventlog": {
"created": {
"at": 1507196961
"by": 0
}
"id": "2F417043-5AC9-1005-B1B3-0050568505BC"
"metadata": {
"host_type": "standalone"
"os_type": "windows"
"version": "1"
"local_hostname": "XYZ-log-02"
"local_ipv4": [
"10.2.3.4"
]
"num_logical_processors": 2
"os_details": "Windows Server (R) 2008 Standard; 6.0.6002; Service Pack 2; x86"
"public_ipv4": [
"10.2.3.4"
]
"public_ipv6": [
"*.*.*.*"
]
"total_mem_mb": 2046
}
"modified": {
"at": 1507196961
"by": 0
}
"name": "XYZ-log-02"
"stats": {
"last_day_bytes": 0
"last_update_ts": 0
}
"status": {
"details": []
"status": "new"
"timestamp": 1508228722
"updated": 1508228722
}
"tags": []
}
}
All you have to do to print IPs is to replace this block:
{
data[$1] = $2
}
with this one:
{
if ($2 == "[")
list = $1
else if (list && $1 =="]")
list = 0
else if (list)
data[list] = data[list] "," $1
else
data[$1] = $2
}
This will parse all the JSON lists in your data into single strings, in which all list elements will be joined by comma. So this will work not only for local_ipv4, but also for public_ipv6 and others.
And then you just need to get rid of an extra comma. So instead of:
print data["customer_name"], data["os_type"], data["local_hostname"], data["name"], data["local_ipv4"], data["status"]
use substr(data["local_ipv4"],2) function to remove the comma in front:
print data["customer_name"], data["os_type"], data["local_hostname"], data["name"], substr(data["local_ipv4"],2), data["status"]
At least, this is a simple way of doing it based on your code.

Orientdb Slow import large dataset how to make it faster?

I'm working on a network of 17M edges and 20K vrtices , and I'm loading it into Orientdb using ETL tool but it is taking forever to load.
I tried the batch varying from 1000 to 100000 , yet still no change .
Is there an optimized way to make it load faster ? other tha using Java API
Any help would be appreciated.
I'm using 2.2.20 community version .
Here is the ETL fo import :
{
"source": { "file": { "path": "C:/Users/Muuna/Desktop/files/H.csv" } },
"extractor": { "csv": {
"separator": ",",
"columnsOnFirstLine": true,
"ignoreEmptyLines": true,
"columns": ["id:Integer","p1:String","p2:String","s:Integer"] } },
"transformers": [
{ "command": { "command": "UPDATE H set p='${input.p1}' UPSERT WHERE p='${input.p1}'"},"vertex": { "class": "H", "skipDuplicates": true} }
],
"loader": {
"orientdb": {
"dbURL": "PLOCAL:C:/orientdb/databases/Graph",
"dbUser": "admin",
"dbPassword": "admin",
"dbType": "graph",
"classes": [
{"name": "H", "extends": "V"},
{"name": "HAS_S", "extends": "E"}
],"indexes": [ {"class":"H", "fields":["p:String"], "type":"UNIQUE" }]
}
}
}
Based on [1]: orientdb load graph csv of nodes and edges
The same script is loaded twice to import the 2 vertices and another ETL for loading the edges .
Edges .
Based on [Ref][1]
{
"source": { "file": { "path": "C:/Users/Muuna/Desktop/files/H.csv" } },
"extractor": { "csv": {
"separator": ",",
"columnsOnFirstLine": true,
"ignoreEmptyLines": true,
"columns": ["id:Integer","p1:String","p2:String","s:Integer"] } },
"transformers": [
{ "command": { "command": "CREATE EDGE HAS_S FROM (SELECT FROM H WHERE p='${input.p1}') TO (SELECT FROM H WHERE p='${input.p2}') set score=${input.s}"} }
],
"loader": {
"orientdb": {
"dbURL": "PLOCAL:C:/orientdb/databases/Graph",
"dbUser": "admin",
"dbPassword": "admin",
"dbType": "graph",
"classes": [
{"name": "H", "extends": "V"},
{"name": "HAS_S", "extends": "E"}
],"indexes": [ {"class":"H", "fields":["p:String"], "type":"UNIQUE" }]
}
}
}

Error Parsing JSON from YELP API

I am having trouble parsing JSON from Yelp API. The JSON data looks like this:
{
region: {
span: {
latitude_delta: 0,
longitude_delta: 0
},
center: {
latitude: 38.054117,
longitude: -84.439002
}
},
total: 23,
businesses: [
{
is_claimed: false,
rating: 5,
mobile_url: "http://m.yelp.com/biz/vineyard-community-church-lexington",
rating_img_url: "http://s3-media1.ak.yelpcdn.com/assets/2/www/img/f1def11e4e79/ico/stars/v1/stars_5.png",
review_count: 2,
name: "Vineyard Community Church",
snippet_image_url: "http://s3-media4.ak.yelpcdn.com/photo/VoeMtbk7NRFi6diksSUtOQ/ms.jpg",
rating_img_url_small: "http://s3-media1.ak.yelpcdn.com/assets/2/www/img/c7623205d5cd/ico/stars/v1/stars_small_5.png",
url: "http://www.yelp.com/biz/vineyard-community-church-lexington",
phone: "8592582300",
snippet_text: "I have been a member of Vineyard Community Church since 2004. Here you will find a modern worship service with a full band, witty speakers who teach...",
image_url: "http://s3-media3.ak.yelpcdn.com/bphoto/D71eikniuaHjdOC8DB6ziA/ms.jpg",
categories: [
[
"Churches",
"churches"
]
],
display_phone: "+1-859-258-2300",
rating_img_url_large: "http://s3-media3.ak.yelpcdn.com/assets/2/www/img/22affc4e6c38/ico/stars/v1/stars_large_5.png",
id: "vineyard-community-church-lexington",
is_closed: false,
location: {
city: "Lexington",
display_address: [
"1881 Eastland Pwky",
"Lexington, KY 40505"
],
geo_accuracy: 8,
postal_code: "40505",
country_code: "US",
address: [
"1881 Eastland Pwky"
],
coordinate: {
latitude: 38.054117,
longitude: -84.439002
},
state_code: "KY"
}
}
]
}
The JSON is stored in a ruby string called #stuff
Here is the code I use to try and parse it:
#parsed_stuff = JSON::parse(#stuff)
When i do this and try and display the contents of # parsed_stuff, i get the following error in the browser
Parse error on line 2: { "region"=>{ "span -------------^ Expecting '}', ':', ',', ']'
Any help given on this issue will be highly appreciated.
Use jsonlint for validating JSON. Here you have to give all keys as a string.
Try it
{
"region": {
"span": {
"latitude_delta": 0,
"longitude_delta": 0
},
"center": {
"latitude": 38.054117,
"longitude": -84.439002
}
},
"total": 23,
"businesses": [
{
"is_claimed": false,
"rating": 5,
"mobile_url": "http://m.yelp.com/biz/vineyard-community-church-lexington",
"rating_img_url": "http://s3-media1.ak.yelpcdn.com/assets/2/www/img/f1def11e4e79/ico/stars/v1/stars_5.png",
"review_count": 2,
"name": "Vineyard Community Church",
"snippet_image_url": "http://s3-media4.ak.yelpcdn.com/photo/VoeMtbk7NRFi6diksSUtOQ/ms.jpg",
"rating_img_url_small": "http://s3-media1.ak.yelpcdn.com/assets/2/www/img/c7623205d5cd/ico/stars/v1/stars_small_5.png",
"url": "http://www.yelp.com/biz/vineyard-community-church-lexington",
"phone": "8592582300",
"snippet_text": "I have been a member of Vineyard Community Church since 2004. Here you will find a modern worship service with a full band, witty speakers who teach...",
"image_url": "http://s3-media3.ak.yelpcdn.com/bphoto/D71eikniuaHjdOC8DB6ziA/ms.jpg",
"categories": [
[
"Churches",
"churches"
]
],
"display_phone": "+1-859-258-2300",
"rating_img_url_large": "http://s3-media3.ak.yelpcdn.com/assets/2/www/img/22affc4e6c38/ico/stars/v1/stars_large_5.png",
"id": "vineyard-community-church-lexington",
"is_closed": false,
"location": {
"city": "Lexington",
"display_address": [
"1881 Eastland Pwky",
"Lexington, KY 40505"
],
"geo_accuracy": 8,
"postal_code": "40505",
"country_code": "US",
"address": [
"1881 Eastland Pwky"
],
"coordinate": {
"latitude": 38.054117,
"longitude": -84.439002
},
"state_code": "KY"
}
}
]
}

Resources