Custom sphinx role for references - python-sphinx

I would like to create a sphinx role :config:`param` that appears as literal text config["param"] and at the same time is a link.
I've a basic running version:
def config_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
rendered = nodes.Text('config["{}"]'.format(text))
rel_source = inliner.document.attributes['source'].split('/doc/', 1)[1]
levels = rel_source.count('/')
refuri = ('../' * levels +
'tutorials/introductory/customizing.html#config')
ref = nodes.reference(rawtext, rendered, refuri=refuri)
return [nodes.literal('', '', ref)], []
def setup(app):
app.add_role("config", config_role)
return {"parallel_read_safe": True, "parallel_write_safe": True}
However, there are two issues:
The target of the link is in another part of the documentation. Therefore I have to create the refuri explicitly. Is is possible to use the link anchor somehow as one would do with :ref:`config` in .rst?
nodes.reference has the unwanted effect that the double-quotes are replaced by typographic quotes. Since it's in a literal bock, I'd rather keep the plain double quotes. Is this somehow possible?

Related

How to have ruby conditionally check if variables exist in a string?

So I have a string from a rendered template that looks like
"Dear {{user_name}},\r\n\r\nThank you for your purchase. If you have any questions, we are happy to help.\r\n\r\n\r\n{{company_name}}\r\n{{company_phone_number}}\r\n"
All those variables like {{user_name}} are optional and do not need to be included but I want to check that if they are, they have {{ in front of the variable name. I am using liquid to parse and render the template and couldn't get it to catch if the user only uses 1 (or no) opening brackets. I was only able to catch the proper number of closing brackets. So I wrote a method to check that if these variables exist, they have the correct opening brackets. It only works, however, if all those variables are found.
here is my method:
def validate_opening_brackets?(template)
text = %w(user_name company_name company_phone_number)
text.all? do |variable|
next unless template.include? variable
template.include? "{{#{variable}"
end
end
It works, but only if all variables are present. If, for example, the template created by the user does not include user_name, then it will return false. I've also done this loop using each, and creating a variable outside of the block that I assign false if the conditions are not met. I would really, however, like to get this to work using the all? method, as I can just return a boolean and it's cleaner.
If the question is about how to rewrite the all? block to make it return true if all present variable names have two brackets before them and false otherwise then you could use something like this:
def validate_opening_brackets?(template)
variables = %w(user_name company_name company_phone_number)
variables.all? do |variable|
!template.include?(variable) || template.include?("{{#{variable}")
end
end
TL;DR
There are multiple ways to do this, but the easiest way I can think of is to simply prefix/postfix a regular expression with the escaped characters used by Mustache/Liquid, and using alternation to check for each of your variable names within the template variable characters (e.g. double curly braces). You can then use String#scan and then return a Boolean from Enumerable#any? based on the contents of the Array returned by from #scan.
This works with your posted example, but there may certainly be other use cases where you need a more complex solution. YMMV.
Example Code
This solution escapes the leading and trailing { and } characters to avoid having them treated as special characters, and then interpolates the variable names with | for alternation. It returns a Boolean depending on whether templated variables are found.
def template_string_has_interpolations? str
var_names = %w[user_name company_name company_phone_number]
regexp = /\{\{#{var_names.join ?|}\}\}/
str.scan(regexp).any?
end
Tested Examples
template_string_has_interpolations? "Dear {{user_name}},\r\n\r\nThank you for your purchase. If you have any questions, we are happy to help.\r\n\r\n\r\n{{company_name}}\r\n{{company_phone_number}}\r\n"
#=> true
template_string_has_interpolations? "Dear Customer,\r\n\r\nThank you for your purchase. If you have any questions, we are happy to help.\r\n\r\n\r\nCompany, Inc.\r\n(555) 555-5555\r\n"
#=> false

How to add a namespace to existing xml file

I want to open this file and get all elements that start with us-gaap.
ftp://ftp.sec.gov/edgar/data/916789/0001558370-15-001143.txt
To get elements I tried like this:
str = '<html><body><us-gaap:foo>foo</us-gaap:foo></body></html>'
doc = Nokogiri::XML(File.read(str))
doc.xpath('//us-gaap:*')
Nokogiri::XML::XPath::SyntaxError: Undefined namespace prefix: //us-gaap:*
from /Users/ironsand/.rbenv/versions/2.2.2/lib/ruby/gems/2.2.0/gems/nokogiri-1.6.7.2/lib/nokogiri/xml/searchable.rb:165:in `evaluate'
doc.namespaces returns {}, so I think I have to add namespace us-gaap.
There are some questions about "adding namespace with Nokogiri", but it looks like about how to create a new XML document, not how to add a namespace to existing documents.
How can I add a namespace to existing document?
I know I can remove the namespace by Nokogiri::XML::Document#remove_namespaces!, but I don't want to use it because it removes also necesarry information.
You have asked an XY Problem. You think that the problem is that you need to add a missing namespace; the real problem is that the file you're trying to parse is not valid XML.
require 'nokogiri'
doc = Nokogiri.XML( IO.read('0001558370-15-001143.txt') )
doc.errors.length
#=> 5716
For example, the <ACCEPTANCE-DATETIME> 'element' opened on line 3 is never closed, and on line 16 there is a raw ampersand in the text:
STANDARD INDUSTRIAL CLASSIFICATION: ELECTRIC HOUSEWARES & FANS [3634]
which ought to be escaped as an entity.
However, the document has valid XML fragments within it! In particular, there is one XML document that defines xmlns:us-gaap namespace, from lines 27243-49312. Let's extract just that, using only the knowledge that the root element defines the namespace we want, and the assumptions that no element with the same name is nested within the document, and that the root element does not have an unescaped > character in any attribute. (These assumptions are valid for this file, but may not be valid for every XML file.)
txt = IO.read('0001558370-15-001143.txt')
gaap_finder = %r{(<(\w+) [^>]+xmlns:us-gaap=.+?</\2>)}m
txt.scan(gaap_finder) do |xml,_|
doc = Nokogiri.XML( xml )
gaaps = doc.xpath('//us-gaap:*')
p gaaps.length
#=> 569
end
The code above handles the case where there may be more than one XML document in the txt file, though in this case there is only one.
Decoded, the gaap_finder regex says this:
%r{...}m — this is a regular expression (that allows slashes in it, unescaped) with "multiline mode", where a period will match newline characters
(...) — capture everything we find
< — start with a literal "less-than" symbol
(\w+) — find one or more word characters (the tag name), and save them
— the word characters must be followed by a space (important to avoid capturing the <xsd:xbrl ...> element in this file)
[^>]+ — followed by one or more characters that is NOT a "greater-than" symbol (to ensure that we stay in the same element that we started in)
xmlns:us-gaap\s*= — followed by this literal namespace declaration (which may have whitespace separating it from the equals sign)
.+? — followed by anything (as little as possible)...
</\2> — ...up until you see a closing tag with the same name as what we captured for the name of the starting tag
Because of the way scan works when the regex has capturing groups, each result is a two-element array, where the first element is the entire captured XML and the second element is the name of the tag that we captured (which we "discard" by assigning it to the _ variable).
If you want to be less magic about your capturing, the text file format appears to always wrap each XML document in <XBRL>...</XBRL>. So, you could do this to process every XML file (there are seven, five of which do not happen to have any us-gaap namespaces):
txt = IO.read('0001558370-15-001143.txt')
xbrls = %r{(?<=<XBRL>).+?(?=</XBRL>)}m # find text inside <XBRL>…</XBRL>
txt.scan(xbrls) do |xml|
doc = Nokogiri.XML( xml )
if doc.namespaces["xmlns:us-gaap"]
gaaps = doc.xpath('//us-gaap:*')
p gaaps.length
end
end
#=> 569
#=> 0 (for the XML Schema document that defines the namespace)
I couldn't figure out how to update an existing doc with a new namespace, but since Nokogiri will recognize namespaces on the root element, and those namespaces are, syntactically, just attributes, you can update the document with a new namespace declaration, serialize the doc to a string, and re-parse it:
str = '<html><body><us-gaap:foo>foo</us-gaap:foo></body></html>'
doc_without_ns = Nokogiri::XML(str)
doc_without_ns.root['xmlns:us-gaap'] = 'http://your/actual/ns/here'
doc = Nokogiri::XML(doc_without_ns.to_xml)
doc.xpath("//us-gaap:*")
# Returns [#<Nokogiri::XML::Element:0x3ff375583f9c name="foo" namespace=#<Nokogiri::XML::Namespace:0x3ff375583f24 prefix="us-gaap" href="http://your/actual/ns/here"> children=[#<Nokogiri::XML::Text:0x3ff375583768 "foo">]>]

How can I configure the separator character used for :menuselection:?

I am using Sphinx to generate HTML documentation for my project. Under Inline Markup, the Sphinx documentation discusses :menuselection: for marking a sequence of menu selections using markup like:
:menuselection:`Start --> Programs`
This results in the following HTML:
<span class="menuselection">Start ‣ Programs</span>
i.e. the --> gets converted to the small triangle, which I've determined is U+2023, TRIANGULAR BULLET.
That's all well and good, but I'd like to use a different character instead of the triangle. I have searched the Sphinx package and the theme package (sphinx-bootstrap-theme) somewhat exhaustively for 'menuselection', the triangle character, and a few other things, but haven't turned up anything that does the substitution from --> to ‣ (nothing obvious to me, anyway). But something must be converting it between my .rst source and the html.
My question is: what, specifically is doing the conversion (sphinx core? HTML writer? Theme JS?)?
The conversion is done in the sphinx.roles.menusel_role() function. You can create your own version of this function with a different separator character and register it to be used.
Add the following to your project's conf.py:
from docutils import nodes, utils
from docutils.parsers.rst import roles
from sphinx.roles import _amp_re
def patched_menusel_role(typ, rawtext, text, lineno, inliner, options={}, content=[]):
text = utils.unescape(text)
if typ == 'menuselection':
text = text.replace('-->', u'\N{RIGHTWARDS ARROW}') # Here is the patch
spans = _amp_re.split(text)
node = nodes.emphasis(rawtext=rawtext)
for i, span in enumerate(spans):
span = span.replace('&&', '&')
if i == 0:
if len(span) > 0:
textnode = nodes.Text(span)
node += textnode
continue
accel_node = nodes.inline()
letter_node = nodes.Text(span[0])
accel_node += letter_node
accel_node['classes'].append('accelerator')
node += accel_node
textnode = nodes.Text(span[1:])
node += textnode
node['classes'].append(typ)
return [node], []
# Use 'patched_menusel_role' function for processing the 'menuselection' role
roles.register_local_role("menuselection", patched_menusel_role)
When building html, make sure to make clean first so that the updated conf.py is re-parsed with the patch.

Parse a string with multiple XML-like tags using Ruby

I have a string which looks like the following:
string = " <SET-TOPIC>INITIATE</SET-TOPIC>
<SETPROFILE>
<PROFILE-KEY>predicates_live</PROFILE-KEY>
<PROFILE-VALUE>yes</PROFILE-VALUE>
</SETPROFILE>
<think>
<set><name>first_time_initiate</name>yes</set>
</think>
<SETPROFILE>
<PROFILE-KEY>first_time_initiate</PROFILE-KEY>
<PROFILE-VALUE>YES</PROFILE-VALUE>
</SETPROFILE>"
My objective is to be able to read out each top level that is in caps with the parse. I use a case statement to evaluate what is the top level key, such as <SETPROFILE> but there can be lots of different values, and then run a method that does different things with the contnts of the tag.
What this means is I need to be able to know very easily:
top_level_keys = ['SET-TOPIC', 'SET-PROFILE', 'SET-PROFILE']
when I pass in the key know the full value
parsed[0].value = {:PROFILE-KEY => predicates_live, :PROFILE-VALUE => yes}
parsed[0].key = ['SET-TOPIC']
I currently parse the whole string as follows:
doc = Nokogiri::XML::DocumentFragment.parse(string)
parsed = doc.search('*').each_with_object({}){ |n, h|
h[n.name] = n.text
}
As a result, I only parse and know of the second tag. The values from the first tag do not show up in the parsed variable.
I have control over what the tags are, if that helps.
But I need to be able to parse and know the contents of both tag as a result of the parse because I need to apply a method for each instance of the node.
Note: the string also contains just regular text, both before, in between, and after the XML-like tags.
It depends on what you are going to achieve. The problem is that you are overriding hash keys by new values. The easiest way to collect values is to store them in array:
parsed = doc.search('*').each_with_object({}) do |n, h|
# h[n.name] = n.text :: removed because it overrides values
(h[n.name] ||= []) << n.text
end

Substitution in a file name with reStructuredText (Sphinx)?

I want to create several files from a single template, which differ only by a variable name. For example :
(file1.rst):
.. |variable| replace:: 1
.. include template.rst
(template.rst) :
Variable |variable|
=====================
Image
-------
.. image:: ./images/|variable|-image.png
where of course I have an image called "./images/1-image.png". The substitution of "|variable|" by "1" works well in the title, but not in the image file name, and at compilation I get :
WARNING: image file not readable: ./images/|variable|-image.png
How can I get reST to make the substitution in the variable name too? (if this changes anything, am using Sphinx).
There are two problems here: a substitution problem, and a parsing order problem.
For the first problem, the substitution reference |variable| cannot have adjacent characters (besides whitespace or maybe _ for hyperlinking) or else it won't parse as a substitution reference, so you need to escape it:
./images/\ |variable|\ -image.png
However, the second problem is waiting around the corner. While I'm not certain of the details, it seems reST is unable to parse substitutions inside other directives. I think it first parses the image directive, which puts it in the document tree and thus out of reach of the substitution mechanism. Similarly, I don't think it's possible to use a substitution to insert content intended to be parsed (e.g. .. |img1| replace::`.. image:: images/1-image.png`). This is all speculative based on some tests and my incomplete comprehension of the official documentation, so someone more knowledgeable can correct what I've said here.
I think you're aware of the actual image substitution directive (as opposed to text substitution), but I don't think it attains the generality you're aiming for (you'll still need a separate directive for the image as from the |variable|), but in any case it looks like this:
.. |img1| image:: images/1-image.png
Since you're using Sphinx, you can try creating your own directive extension (see this answer for information), but it won't solve the substitutions-inside-markup problem.
You have to create a custom directive in this case as Sphinx doesn't allow you to substitute image paths. You can change Sphinx figure directive as follows and use it instead of the image directive.
from typing import Any, Dict, List, Tuple
from typing import cast
from docutils import nodes
from docutils.nodes import Node, make_id, system_message
from docutils.parsers.rst import directives
from docutils.parsers.rst.directives import images, html, tables
from sphinx import addnodes
from sphinx.directives import optional_int
from sphinx.domains.math import MathDomain
from sphinx.util.docutils import SphinxDirective
from sphinx.util.nodes import set_source_info
if False:
# For type annotation
from sphinx.application import Sphinx
class CustomFigure(images.Figure):
"""The figure directive which applies `:name:` option to the figure node
instead of the image node.
"""
def run(self) -> List[Node]:
name = self.options.pop('name', None)
path = self.arguments[0] #path = ./images/variable-image.png
#replace 'variable' from th.e given value
self.argument[0] = path.replace("variable", "string substitution")
result = super().run()
if len(result) == 2 or isinstance(result[0], nodes.system_message):
return result
assert len(result) == 1
figure_node = cast(nodes.figure, result[0])
if name:
# set ``name`` to figure_node if given
self.options['name'] = name
self.add_name(figure_node)
# copy lineno from image node
if figure_node.line is None and len(figure_node) == 2:
caption = cast(nodes.caption, figure_node[1])
figure_node.line = caption.line
return [figure_node]
def setup(app: "Sphinx") -> Dict[str, Any]:
directives.register_directive('figure', Figure)
return {
'version': 'builtin',
'parallel_read_safe': True,
'parallel_write_safe': True,
}
You can add this CustomFigure.py directive in the conf.py of the project and use the customfigure directive across Sphinx project instead of the Image directive. Refer http://www.sphinx-doc.org/en/master/usage/extensions/index.html to add a custom directive to your Sphinx project.

Resources