Extracting data w/ Scrapy / XPath - xpath

I created the following spider which leads to these issues when running it:
The headline is "cut" — probably due to the <em> tag inside
The location contains spaces and \n
Currently struggling to find a solution for these two remaining issues.
class GitHubSpider(scrapy.Spider):
name = "github"
start_urls = [
"https://github.com/search?p=1&q=React+Django&type=Users",
]
def parse(self, response):
for github in response.css(".Box-row"):
yield {
"github_link": github.css(".mr-1::attr(href)").get(),
"name": github.css(".mr-1::text").get(),
"headline": github.css(".mb-1::text").get(),
"location": github.css(".mr-3:nth-child(1)::text").get(),
}
Expected result
# 2021-08-07 11:59:29 [scrapy.core.scraper] DEBUG: Scraped from <200 https://github.com/search?p=1&q=React+Django&type=Users>
{
'github_link': '/djangofan',
'name': 'Jon Austen',
'headline': 'Software Engineer interested in Java, Python, Ruby, Groovy, Bash, Clojure, React-Native, and Docker. Focus: Testing, CI, and Micro-Services.',
'location': 'Portland, OR'
}
# 2021-08-07 11:59:29 [scrapy.core.scraper] DEBUG: Scraped from <200 https://github.com/search?p=1&q=React+Django&type=Users>
{
'github_link': '/django-wong',
'name': ' Wong',
'headline': 'PHP / Node.js / Dart (Flutter) / React Native / Scala',
'location': 'China'
}
[...]
Actual Result
# 2021-08-07 11:59:29 [scrapy.core.scraper] DEBUG: Scraped from <200 https://github.com/search?p=1&q=React+Django&type=Users>
{
'github_link': '/djangofan',
'name': 'Jon Austen',
'headline': 'Software Engineer interested in Java, Python, Ruby, Groovy, Bash, Clojure, ',
'location': '\n Portland, OR\n '
}
# 2021-08-07 11:59:29 [scrapy.core.scraper] DEBUG: Scraped from <200 https://github.com/search?p=1&q=React+Django&type=Users>
{
'github_link': '/django-wong',
'name': ' Wong',
'headline': 'PHP / Node.js / Dart (Flutter) / ',
'location': '\n China\n '
}
[...]

Here is the solution:
CODE:
import scrapy
class GitHubSpider(scrapy.Spider):
name = "github"
start_urls = [
"https://github.com/search?p=1&q=React+Django&type=Users",
]
def parse(self, response):
for github in response.xpath('//*[#class="flex-auto"]'):
yield {
"github_link": github.xpath('.//*[#class="color-text-secondary"]/#href').get(),
"name": github.xpath('.//*[#class="mr-1"]/text()').get(),
"headline": github.xpath('.//*[#class="mb-1"]//text()').get(),
"location": github.xpath('normalize-space(.//*[#class="mr-3"]/text())').get()
}
OUTPUT:
{'github_link': '/djangofan', 'name': 'Jon Austen', 'headline': 'Software Engineer interested in Java, Python, Ruby, Groovy, Bash, Clojure, ', 'location': 'Portland, OR'}
2021-08-07 17:19:10 [scrapy.core.scraper] DEBUG: Scraped from <200 https://github.com/search?p=1&q=React+Django&type=Users>
{'github_link': '/django-wong', 'name': ' Wong', 'headline': 'PHP / Node.js / Dart (Flutter) / ', 'location': 'China'}
2021-08-07 17:19:10 [scrapy.core.scraper] DEBUG: Scraped from <200 https://github.com/search?p=1&q=React+Django&type=Users>
{'github_link': '/DipanshKhandelwal', 'name': 'Dipansh Khandelwal', 'headline': 'React', 'location': 'Bengaluru, India'}
2021-08-07 17:19:10 [scrapy.core.scraper] DEBUG: Scraped from <200 https://github.com/search?p=1&q=React+Django&type=Users>
{'github_link': '/usj-django-dev', 'name': 'Utsho Sadhak Joy', 'headline': 'const Joy = (', 'location': 'Khulna,Bangladesh'}
2021-08-07 17:19:10 [scrapy.core.scraper] DEBUG: Scraped from <200 https://github.com/search?p=1&q=React+Django&type=Users>
{'github_link': '/kpnemre', 'name': 'Emre Kapan', 'headline': 'React', 'location': ''}
2021-08-07 17:19:10 [scrapy.core.scraper] DEBUG: Scraped from <200 https://github.com/search?p=1&q=React+Django&type=Users>
{'github_link': '/indraasura', 'name': 'Swarup Hegde', 'headline': 'Proficient in JavaScript, Python, ', 'location': 'Indore, India'}
2021-08-07 17:19:10 [scrapy.core.scraper] DEBUG: Scraped from <200 https://github.com/search?p=1&q=React+Django&type=Users>
{'github_link': '/pongstr', 'name': 'Pongstr', 'headline': 'Vue. ', 'location': 'Tallinn, Estonia'}
2021-08-07 17:19:10 [scrapy.core.scraper] DEBUG: Scraped from <200 https://github.com/search?p=1&q=React+Django&type=Users>
{'github_link': '/wencakisa', 'name': 'Ventsislav Tashev', 'headline': 'Django', 'location': 'Sofia, Bulgaria'}
2021-08-07 17:19:10 [scrapy.core.scraper] DEBUG: Scraped from <200 https://github.com/search?p=1&q=React+Django&type=Users>
{'github_link': '/novelview9', 'name': 'GarakdongBigBoy', 'headline': 'Django', 'location': ''}
2021-08-07 17:19:10 [scrapy.core.scraper] DEBUG: Scraped from <200 https://github.com/search?p=1&q=React+Django&type=Users>
{'github_link': '/willemarcel', 'name': 'Wille Marcel', 'headline': 'Software engineer. Python, ', 'location': 'Salvador-BA-Brazil'}
2021-08-07 17:19:10 [scrapy.core.engine] INFO: Closing spider (finished)
2021-08-07 17:19:10 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
{'downloader/request_bytes': 327,
'downloader/request_count': 1,
'downloader/request_method_count/GET': 1,
'downloader/response_bytes': 24422,
'downloader/response_count': 1,
'downloader/response_status_count/200': 1,
'elapsed_time_seconds': 1.271329,
'finish_reason': 'finished',
'finish_time': datetime.datetime(2021, 8, 7, 11, 19, 10, 249326),
'httpcompression/response_bytes': 132530,
'httpcompression/response_count': 1,
'item_scraped_count': 10,

The first problem can be fixed with xpath and string().
The second problem can be fixed with strip().
class GitHubSpider(scrapy.Spider):
name = "github"
start_urls = [
"https://github.com/search?p=1&q=React+Django&type=Users",
]
def strip_string(self, string):
if string is not None:
return string.strip()
def parse(self, response):
for github in response.css(".Box-row"):
github_link = self.strip_string(github.css(".mr-1::attr(href)").get())
name = self.strip_string(github.css(".mr-1::text").get())
headline = self.strip_string(github.xpath('string(//p[#class="mb-1"])').get())
location = self.strip_string(github.css(".mr-3:nth-child(1)::text").get())
yield {
"github_link": github_link,
"name": name,
"headline": headline,
"location": location
}

Related

Loop into Register variables in Ansible Playbook to generate json payloads

actually i develop an ansible playbook, i need to loop into registred variable to generate payload json using template to send it later through an API
My task :
- name: check bad directories permissions
shell: /tmp/check_permission.sh -d
register: dir_output
- name : plugin output
debug:
msg: "{{dir_output}}"
results of debug :
ok: [hostname1] => {}
MSG:
{'changed': True, 'end': '2022-07-21 12:37:10.039682', 'stdout': 'OK : No folders with wrong permissions\n\n0', 'cmd': '/tmp/check_permission.sh -d', 'failed': True, 'delta': '0:00:00.759062', 'stderr': '', 'rc': 1, 'start': '2022-07-21 12:37:09.280620', 'msg': 'non-zero return code', 'stdout_lines': ['OK : No folders with wrong permissions', '', '0'], 'stderr_lines': []}
ok: [hostname2] => {}
MSG:
{'changed': True, 'end': '2022-07-21 12:37:07.263145', 'stdout': 'OK : No folders with wrong permissions\n\n0', 'cmd': '/tmp/check_permission.sh -d', 'failed': True, 'delta': '0:00:00.396584', 'stderr': '', 'rc': 1, 'start': '2022-07-21 12:37:06.866561', 'msg': 'non-zero return code', 'stdout_lines': ['OK : No folders with wrong permissions', '', '0'], 'stderr_lines': []}
My Loop :
name: Generating json paylod
template:
src: "check_template3.json.j2"
dest: "/tmp/check_template3.json"
loop:
- "{{dir_output}}"
run_once: yes
delegate_to: localhost
tags: render exit_status
my template :
{
results: "{{item}}"
}
My .json results :
{
results: "{'changed': True, 'end': '2022-07-21 12:37:10.039682', 'stdout': 'OK : No folders with wrong permissions\n\n0', 'cmd': '/tmp/check_permission.sh -d', 'failed': True, 'delta': '0:00:00.759062', 'stderr': '', 'rc': 1, 'start': '2022-07-21 12:37:09.280620', 'msg': 'non-zero return code', 'stdout_lines': ['OK : No folders with wrong permissions', '', '0'], 'stderr_lines': []}"
}
expected results :
i need to loop on register and see two output messages on my json
{
results: "{'changed': True, 'end': '2022-07-21 12:37:10.039682', 'stdout': 'OK : No folders with wrong permissions\n\n0', 'cmd': '/tmp/check_permission.sh -d', 'failed': True, 'delta': '0:00:00.759062', 'stderr': '', 'rc': 1, 'start': '2022-07-21 12:37:09.280620', 'msg': 'non-zero return code', 'stdout_lines': ['OK : No folders with wrong permissions', '', '0'], 'stderr_lines': []}"
results: "second results of the second hostname"
}
Any idea, thanks
Try
- debug:
var: content
vars:
content: |-
{
{% for host in ansible_play_hosts %}
results: {{ hostvars[host]['dir_output']|to_json }}
{% endfor %}
}
run_once: true
The content is not a valid YAML/JSON.

Ansible Filter with_items list

I am using Ansible 2.10.7 and I need to filter a specific item in the with_items list.
Before using with_items msg looks like this:
"ansible_facts": {
"cvp_info": {
"version": "2020.2.3"
},
"devices": [
{
"architecture": "",
"bootupTimeStamp": 1615810038.137913,
"bootupTimestamp": 1615810038.137913,
"complianceCode": "",
Playbook:
---
- name: Playbook to demonstrate cv_container module.
hosts: cvp_servers
connection: local
gather_facts: no
collections:
- arista.cvp
vars:
vars_files:
- vars.yml
tasks:
- name: "collecting facts from CVP {{inventory_hostname}}"
arista.cvp.cv_facts:
facts:
devices
- name: "Print out facts from CVP"
debug:
msg: "{{item.name}}"
with_items: "{{devices}}"
After using the with_items: "{{devices}}", I see it is filtering the big list and then I get this output which I want to filter:
ok: [hq] => (item={'hostname': 'rd-sw055', 'danzEnabled': False, 'mlagEnabled': False, 'streamingStatus': 'active', 'status': 'Registered','bootupTimeStamp': 1605618537.210405, 'internalBuildId': '8c8dfbf2-a4d1-420a-9c9c-59f6aa67a14e', 'taskIdList': [], 'tempAction': None, 'memTotal': 0, 'memFree': 0, 'sslConfigAvailable': False, 'sslEnabledByCVP': False, 'lastSyncUp': 0, 'type': 'netelement', 'dcaKey': None, 'containerName': 'HQ',
'name': 'rd-sw055','deviceSpecificConfiglets': ['rd-sw055'], 'imageBundle': ''}) => {
"msg": "rd-sw055"
ok: [hq] => (item={'hostname': 'rd-sw01', 'danzEnabled': False, 'mlagEnabled': False, 'streamingStatus': 'active', 'status': 'Registered','bootupTimeStamp': 1605618537.210405, 'internalBuildId': '8c8dfbf2-a4d1-420a-9c9c-59f6aa67a14e', 'taskIdList': [], 'tempAction': None, 'memTotal': 0, 'memFree': 0, 'sslConfigAvailable': False, 'sslEnabledByCVP': False, 'lastSyncUp': 0, 'type': 'netelement', 'dcaKey': None, 'containerName': 'HQ',
'name': 'rd-sw01','deviceSpecificConfiglets': ['rd-sw01'], 'imageBundle': ''}) => {
"msg": "rd-sw01"
I want it to show only the item with the 'name': 'rd-sw01' how can I do it?
I have tried using
loop_control:
label: '{{ item.name }}'
At the end of the playbook but this will only show name value and not the whole item values.
End result wanted:
ok: [hq] => (item={'hostname': 'rd-sw01', 'danzEnabled': False, 'mlagEnabled': False, 'streamingStatus': 'active', 'status': 'Registered','bootupTimeStamp': 1605618537.210405, 'internalBuildId': '8c8dfbf2-a4d1-420a-9c9c-59f6aa67a14e', 'taskIdList': [], 'tempAction': None, 'memTotal': 0, 'memFree': 0, 'sslConfigAvailable': False, 'sslEnabledByCVP': False, 'lastSyncUp': 0, 'type': 'netelement', 'dcaKey': None, 'containerName': 'HQ',
'name': 'rd-sw01','deviceSpecificConfiglets': ['rd-sw01'], 'imageBundle': ''}) => {
"msg": "rd-sw01"
You do want a when condition here:
- debug:
var: item
loop: "{{ devices }}"
when: item.name == 'rd-sw01'
loop_control:
label: "{{ item.name }}"
Or even, simpler, skip the loop:
- debug:
var: devices | selectattr("name", "eq", "rd-sw01")

How to extract items with Ansible from stdout using json_query

I am executing shell script with Ansible which returns json output.
- name: Get mlist
become: no
shell: "PYTHONPATH=/home/centos/scripts/users/ python /home/centos/scripts/users/team_members.py {{ parameter }}"
register: account_list
the output looks like
ok: [localhost] => {
"msg": {
"changed": true,
"cmd": "PYTHONPATH=/home/centos/scripts/users/ python /home/centos/scripts/users/team_members.py parameter",
"delta": "0:00:00.530377",
"end": "2019-10-09 08:28:20.222480",
"failed": false,
"rc": 0,
"start": "2019-10-09 08:28:19.692103",
"stderr": "2019-10-09 08:28:19,915 INFO",
"stderr_lines": [
"2019-10-09 08:28:19,915 INFO"
],
"stdout": "[{'id': 'XXX=', 'name': 'XXX', 'login': 'xxx'}, {'id': 'YYY', 'name': 'YYY', 'login': 'yyy'}, {'id': 'ZZZ', 'name': 'zzz', 'login': 'zzz'}]",
"stdout_lines": [
"[{'id': 'XXX=', 'name': 'XXX', 'login': 'xxx'}, {'id': 'YYY', 'name': 'YYY', 'login': 'yyy'}, {'id': 'ZZZ', 'name': 'ZZZ', 'login': 'zzz'}]"
]
}
}
What I would like to do is to extract all login items. So far I've tried various ways
- debug:
msg: "{{ account_list.stdout | to_json | json_query('[*].login') }}"
But this is not working. While putting the .stdout to JSONPath Online Evaluator the .[*].login does what I want I just can't do that with Ansible json_query. Anybody who know how to do that ?
Thank you very much in advance.
I found the solution. I`ve had to update python script to actually output json as previously it didn't output correct json (see use of ' instead of ") and then
- debug:
msg: "{{ item }}"
with_items: "{{ account_list.stdout | from_json | json_query('[*].login') }}"
worked correctly

How to append bot's reply into web-chat?

I am unable to get the response from the bot and append it to the web-chat provided by the MS Botframework.
Here's what I have done so far:
Created a bot on https://dev.botframework.com/ (without migration)
Integrated Web Chat and Direct Line
Created a flask listener server and generated HTTPS using ngrok
Gave created server's address as messaging endpoint for the bot
Generated a HTML page using below code (running locally without any server)
<html>
<head>
<link href="https://cdn.botframework.com/botframework-webchat/latest/botchat.css" rel="stylesheet" />
</head>
<body>
<div id="bot"/>
<script src="https://cdn.botframework.com/botframework-webchat/latest/botchat.js"></script>
<script>
BotChat.App({
directLine: { secret: 'direct_line_secret_key' },
user: { id: 'userid' },
bot: { id: 'botid' },
resize: 'detect'
}, document.getElementById("bot"));
</script>
</body>
</html>
Now I am able to send message from the UI as user, and able to capture it in the flask listener server.
But how do I reply from the server so that message will come from bot and append to the UI?
Am I missing something?
I have tried https://directline.botframework.com/v3/directline/conversations/{convoId}/activities to POST a request but that again comes back to server only and does not append to web-chat.
data = json.dumps({"type": "message","from":{"id":"botid"},"text": "Hii!"})
requests.post('https://directline.botframework.com/v3/directline/conversations/' + r['conversation']['id'] + '/activities',
headers={"Authorization": "Bearer " + "secret_key", "Content-Type": "application/json", "Content-Length": "512"},data=data
NOTE: I am using botframework only for the web-chat UI and nothing else.
EDIT 1:
sent this from web-chat ui
http://bcebb07a.ngrok.io/webhook' [POST]>
************************************************************************** {'type': 'message', 'id': '5DvIa5ImiPF4G6WnGlPYyY|0000002',
'timestamp': '2018-06-05T06:51:45.3174659Z', 'serviceUrl':
'https://directline.botframework.com/', 'channelId': 'directline',
'from': {'id': 'sid'}, 'conversation': {'id':
'5DvIa5ImiPF4G6WnGlPYyY'}, 'recipient': {'id':
'one_assist#CrbpWod1mw8', 'name': 'OneAssist'}, 'textFormat': 'plain',
'locale': 'en-US', 'text': 'hello how are you', 'channelData':
{'clientActivityId': '1528180308547.7414264322396316.5'}}
5DvIa5ImiPF4G6WnGlPYyY
#################################################################### {'messages': [{'id':
'5DvIa5ImiPF4G6WnGlPYyY|0000000', 'conversationId':
'5DvIa5ImiPF4G6WnGlPYyY', 'created': '2018-06-05T06:32:50.9651813Z',
'from': 'sid', 'text': 'hello', 'channelData': {'clientActivityId':
'1528180308547.7414264322396316.0'}, 'images': [], 'attachments': []},
{'id': '5DvIa5ImiPF4G6WnGlPYyY|0000001', 'conversationId':
'5DvIa5ImiPF4G6WnGlPYyY', 'created': '2018-06-05T06:47:14.1602925Z',
'from': 'sid', 'text': 'how are you', 'channelData':
{'clientActivityId': '1528180308547.7414264322396316.3'}, 'images':
[], 'attachments': []}, {'id': '5DvIa5ImiPF4G6WnGlPYyY|0000002',
'conversationId': '5DvIa5ImiPF4G6WnGlPYyY', 'created':
'2018-06-05T06:51:45.3174659Z', 'from': 'sid', 'text': 'hello how are
you', 'channelData': {'clientActivityId':
'1528180308547.7414264322396316.5'}, 'images': [], 'attachments':
[]}], 'watermark': '2'}
{ "error": {
"code": "BotError",
"message": "Failed to send activity: bot timed out" }, "httpStatusCode": 504 }
127.0.0.1 - - [05/Jun/2018 12:22:12] "POST /webhook HTTP/1.1" 200 -
************************************************************************** http://bcebb07a.ngrok.io/webhook' [POST]>
************************************************************************** {'type': 'conversationUpdate', 'id': 'D3XJ6CAaVsc', 'timestamp':
'2018-06-05T06:51:56.7508828Z', 'serviceUrl':
'https://directline.botframework.com/', 'channelId': 'directline',
'from': {'id': '5DvIa5ImiPF4G6WnGlPYyY'}, 'conversation': {'id':
'5DvIa5ImiPF4G6WnGlPYyY'}, 'recipient': {'id':
'one_assist#CrbpWod1mw8', 'name': 'OneAssist'}, 'membersAdded':
[{'id': 'botid'}]} 5DvIa5ImiPF4G6WnGlPYyY
#################################################################### {'messages': [{'id':
'5DvIa5ImiPF4G6WnGlPYyY|0000000', 'conversationId':
'5DvIa5ImiPF4G6WnGlPYyY', 'created': '2018-06-05T06:32:50.9651813Z',
'from': 'sid', 'text': 'hello', 'channelData': {'clientActivityId':
'1528180308547.7414264322396316.0'}, 'images': [], 'attachments': []},
{'id': '5DvIa5ImiPF4G6WnGlPYyY|0000001', 'conversationId':
'5DvIa5ImiPF4G6WnGlPYyY', 'created': '2018-06-05T06:47:14.1602925Z',
'from': 'sid', 'text': 'how are you', 'channelData':
{'clientActivityId': '1528180308547.7414264322396316.3'}, 'images':
[], 'attachments': []}, {'id': '5DvIa5ImiPF4G6WnGlPYyY|0000002',
'conversationId': '5DvIa5ImiPF4G6WnGlPYyY', 'created':
'2018-06-05T06:51:45.3174659Z', 'from': 'sid', 'text': 'hello how are
you', 'channelData': {'clientActivityId':
'1528180308547.7414264322396316.5'}, 'images': [], 'attachments':
[]}], 'watermark': '2'}
{ "error": {
"code": "BotError",
"message": "Failed to send activity: bot timed out" }, "httpStatusCode": 504 }
127.0.0.1 - - [05/Jun/2018 12:22:32] "POST /webhook HTTP/1.1" 200 -
# sent this from the server code
************************************************************************** http://bcebb07a.ngrok.io/webhook' [POST]>
************************************************************************** {'type': 'message', 'id': '5DvIa5ImiPF4G6WnGlPYyY|0000003',
'timestamp': '2018-06-05T06:52:16.6925987Z', 'serviceUrl':
'https://directline.botframework.com/', 'channelId': 'directline',
'from': {'id': 'botid'}, 'conversation': {'id':
'5DvIa5ImiPF4G6WnGlPYyY'}, 'recipient': {'id':
'one_assist#CrbpWod1mw8', 'name': 'OneAssist'}, 'text': 'Hii!'}
5DvIa5ImiPF4G6WnGlPYyY
#################################################################### {'messages': [{'id':
'5DvIa5ImiPF4G6WnGlPYyY|0000000', 'conversationId':
'5DvIa5ImiPF4G6WnGlPYyY', 'created': '2018-06-05T06:32:50.9651813Z',
'from': 'sid', 'text': 'hello', 'channelData': {'clientActivityId':
'1528180308547.7414264322396316.0'}, 'images': [], 'attachments': []},
{'id': '5DvIa5ImiPF4G6WnGlPYyY|0000001', 'conversationId':
'5DvIa5ImiPF4G6WnGlPYyY', 'created': '2018-06-05T06:47:14.1602925Z',
'from': 'sid', 'text': 'how are you', 'channelData':
{'clientActivityId': '1528180308547.7414264322396316.3'}, 'images':
[], 'attachments': []}, {'id': '5DvIa5ImiPF4G6WnGlPYyY|0000002',
'conversationId': '5DvIa5ImiPF4G6WnGlPYyY', 'created':
'2018-06-05T06:51:45.3174659Z', 'from': 'sid', 'text': 'hello how are
you', 'channelData': {'clientActivityId':
'1528180308547.7414264322396316.5'}, 'images': [], 'attachments': []},
{'id': '5DvIa5ImiPF4G6WnGlPYyY|0000003', 'conversationId':
'5DvIa5ImiPF4G6WnGlPYyY', 'created': '2018-06-05T06:52:16.6925987Z',
'from': 'botid', 'text': 'Hii!', 'images': [], 'attachments': []}],
'watermark': '3'}
https://directline.botframework.com/v3/directline/conversations/5DvIa5ImiPF4G6WnGlPYyY/activities/
https://directline.botframework.com/v3/conversations/5DvIa5ImiPF4G6WnGlPYyY/activities
{ "error": {
"code": "BotError",
"message": "Failed to send activity: bot timed out" }, "httpStatusCode": 504 }
I had a similar issue while using ngrok and I notice in your logs there are references to 127.0.0.1. Try adding the -host-header parameter to your ngrok command line.
For example I use
ngrok http -host-header=localhost:55486 55486
Where 55486 is the port number my endpoint sits on.

Ansible - Register variable to item property in loop

I'm trying to register the output of a shell command to an item property inside a list of items.
This takes place during a loop but does not appear to register the properties. After the task is ran, the property is still showing the value none. I'm wondering if I'm doing something wrong ? Or Is there a way to accomplish this ?
Variables:
users:
- username: someguy
description: "Some Guy"
groups: ['sudo', 'guy']
new_id: 6001
old_uid:
old_gid:
user_exists:
password: waffles
- username: somedude
description: "Some Dude"
groups: ['dude']
new_id: 6002
old_uid:
old_gid:
user_exists:
password: toast
Tasks
---
- name: Check if user exists
shell: /usr/bin/getent passwd {{ item.username }} | /usr/bin/wc -l | tr -d ' '
with_items: "{{ users }}"
register: item.user_exists
- name: Check user current UID
shell: /usr/bin/id -u {{ item.username }}
with_items: "{{ users }}"
register: item.old_uid
when: item.user_exists == 1
- name: Check user current GID
shell: /usr/bin/id -g {{ item.username }}
with_items: "{{ users }}"
register: item.old_gid
when: item.user_exists == 1
Output
TASK: [users | Check if user exists] ******************************************
changed: [bserver] => (item={'username': 'someguy', 'password': 'waffles', 'description': 'Some Guy', 'new_id': 6001, 'groups': ['sudo', 'guy'], 'user_exists': None, 'old_uid': None, 'old_gid': None})
changed: [aserver] => (item={'username': 'someguy', 'password': 'waffles', 'description': 'Some Guy', 'new_id': 6001, 'groups': ['sudo', 'guy'], 'user_exists': None, 'old_uid': None, 'old_gid': None})
changed: [aserver] => (item={'username': 'somedude', 'password': 'toast', 'description': 'Some Dude', 'new_id': 6002, 'groups': ['dude'], 'user_exists': None, 'old_uid': None, 'old_gid': None})
changed: [bserver] => (item={'username': 'somedude', 'password': 'toast', 'description': 'Some Dude', 'new_id': 6002, 'groups': ['dude'], 'user_exists': None, 'old_uid': None, 'old_gid': None})
TASK: [users | Check user current UID] ****************************************
skipping: [aserver] => (item={'username': 'someguy', 'password': 'waffles', 'description': 'Some Guy', 'new_id': 6001, 'groups': ['sudo', 'guy'], 'user_exists': None, 'old_uid': None, 'old_gid': None})
skipping: [aserver] => (item={'username': 'somedude', 'password': 'toast', 'description': 'Some Dude', 'new_id': 6002, 'groups': ['dude'], 'user_exists': None, 'old_uid': None, 'old_gid': None})
skipping: [bserver] => (item={'username': 'someguy', 'password': 'waffles', 'description': 'Some Guy', 'new_id': 6001, 'groups': ['sudo', 'guy'], 'user_exists': None, 'old_uid': None, 'old_gid': None})
skipping: [bserver] => (item={'username': 'somedude', 'password': 'toast', 'description': 'Some Dude', 'new_id': 6002, 'groups': ['dude'], 'user_exists': None, 'old_uid': None, 'old_gid': None})
TASK: [users | Check user current GID] ****************************************
skipping: [aserver] => (item={'username': 'someguy', 'password': 'waffles', 'description': 'Some Guy', 'new_id': 6001, 'groups': ['sudo', 'guy'], 'user_exists': None, 'old_uid': None, 'old_gid': None})
skipping: [aserver] => (item={'username': 'somedude', 'password': 'toast', 'description': 'Some Dude', 'new_id': 6002, 'groups': ['dude'], 'user_exists': None, 'old_uid': None, 'old_gid': None})
skipping: [bserver] => (item={'username': 'someguy', 'password': 'waffles', 'description': 'Some Guy', 'new_id': 6001, 'groups': ['sudo', 'guy'], 'user_exists': None, 'old_uid': None, 'old_gid': None})
skipping: [bserver] => (item={'username': 'somedude', 'password': 'toast', 'description': 'Some Dude', 'new_id': 6002, 'groups': ['dude'], 'user_exists': None, 'old_uid': None, 'old_gid': None})
Unfortunately that is not how it works.
Your line register: "{{item.user_exists}}" would probably result into the result registered in a variable called false.
You can not inject the result of a task into any object. The register feature will only take a string.
Additionally register has a complete different behavior in a loop. Instead of registering every iteration separately, it will register one single object which holds the results of all items in the key results.
You can still loop over your users and check for existence. But I don't think you will get with this where you want to be.
First register the result in one single variable, here users_checked:
- name: Check if user exists
shell: /usr/bin/getent passwd {{ item.username }} | /usr/bin/wc -l | tr -d ' '
with_items: users
register: users_checked
I recommend you to work with the debug module to always check with what structure of data you're working.
- debug: var=users_checked
This will show you something like this:
"var": {
"users_checked": {
"changed": true,
"msg": "All items completed",
"results": [
{
"changed": true,
"cmd": "/usr/bin/getent passwd someguy | /usr/bin/wc -l | tr -d ' '",
"delta": "0:00:00.005415",
"end": "2015-09-08 20:59:33.379516",
"invocation": {
"module_args": "/usr/bin/getent passwd someguy | /usr/bin/wc -l | tr -d ' '",
"module_name": "shell"
},
"item": {
"description": "Some Guy",
"groups": [
"sudo",
"guy"
],
"new_id": 6001,
"old_gid": null,
"old_uid": null,
"password": "waffles",
"user_exists": null,
"username": "someguy"
},
"rc": 0,
"start": "2015-09-08 20:59:33.374101",
"stderr": "",
"stdout": "0",
"stdout_lines": [
"0"
],
"warnings": []
},
{
"changed": true,
"cmd": "/usr/bin/getent passwd somedude | /usr/bin/wc -l | tr -d ' '",
"delta": "0:00:00.006362",
"end": "2015-09-08 20:59:33.530546",
"invocation": {
"module_args": "/usr/bin/getent passwd somedude | /usr/bin/wc -l | tr -d ' '",
"module_name": "shell"
},
"item": {
"description": "Some Dude",
"groups": [
"dude"
],
"new_id": 6002,
"old_gid": null,
"old_uid": null,
"password": "toast",
"user_exists": null,
"username": "somedude"
},
"rc": 0,
"start": "2015-09-08 20:59:33.524184",
"stderr": "",
"stdout": "0",
"stdout_lines": [
"0"
],
"warnings": []
}
]
}
}
So the result not only holds the actual results of all items, but as well the input objects. That enables you to loop over users_checked.results instead of your original users list.
- name: Check user current UID
shell: /usr/bin/id -u {{ item.item.username }}
with_items: users_checked.results
when: item.stdout == 1
But now it would get nasty, since you want to check two more things. You probably could continue with this approach, register the above result, then use the registered data structure as input for the next loop, but you will end up with a deeply nested object and at the end having the original object somewhere in registered_var.results[x].item.results[y].item.results[z].item....
To avoid this mess you might want to look into creating a custom module for this task.

Resources