HtmlAgility - dealing with in-between html - html-agility-pack

Lets say I have a website "example.com". I call the following line
// Pass html content of the site.com to a string
string htmlCode = client.DownloadString("http://example.com");
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(WebUtility.HtmlDecode(htmlCode));
For some websites I have the exact html I want. But some sites return a html consist of forms, or with an empty body and some kind of scripts.
Example for script one:
<!DOCTYPE html>
<html><head>
<meta http-equiv="Pragma" content="no-cache">
<meta http-equiv="Expires" content="-1">
<meta http-equiv="CacheControl" content="no-cache">
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<link rel="shortcut icon" href="data:;base64,iVBORw0KGgo=">
<script>
(function(){
var securemsg;
var dosl7_common;
window["bobcmn"] = "111110111110102000000022000000052000000002a4b927ad200000096300000000300000000300000006/TSPD/300000008TSPD_101300000005https200000000200000000";
window.hQv=!!window.hQv;try{(function(){try{var __,i_,j_=1,O_=1,z_=1,s_=1,S_=1,Ji=1,li=1,oi=1,si=1;for(var Si=0;Si<i_;++Si)j_+=2,O_+=2,z_+=2,s_+=2,S_+=2,Ji+=2,li+=2,oi+=2,si+=3;__=j_+O_+z_+s_+S_+Ji+li+oi+si;window.SL===__&&(window.SL=++__)}catch(JI){window.SL=__}var OI=!0;function SI(_){_&&(OI=!1,document.cookie="brav=ad");return OI}function _j(){}SI(window[_j.name]===_j);SI("function"!==typeof ie9rgb4);SI(/\x3c/.test(function(){return"\x3c"})&!/x3d/.test(function(){return"'x3'+'d';"}));
var ij=window.attachEvent||/mobi/i.test(window["\x6e\x61vi\x67a\x74\x6f\x72"]["\x75\x73e\x72A\x67\x65\x6et"]),Ij=+new Date+6E5,Jj,lj,Oj=setTimeout,zj=ij?3E4:6E3;function Zj(){if(!document.querySelector)return!0;var _=+new Date,l=_>Ij;if(l)return SI(!1);l=lj&&Jj+zj<_;l=SI(l);Jj=_;lj||(lj=!0,Oj(function(){lj=!1},1));return l}Zj();var sj=[17795081,27611931586,1558153217];
function Sj(_){_="string"===typeof _?_:_.toString(36);var l=window[_];if(!l.toString)return;var O=""+l;window[_]=function(_,O){lj=!1;return l(_,O)};window[_].toString=function(){return O}}for(var si=0;si<sj.length;++si)Sj(sj[si]);SI(!1!==window.hQv);
(function iJ(){if(!Zj())return;var l=!1;function O(l){for(var z=0;l--;)z+=Z(document.documentElement,null);return z}function Z(l,z){var O="vi";z=z||new s;return o_(l,function(l){l.setAttribute("data-"+O,z._s());return Z(l,z)},null)}function s(){this.Lz=1;this.Jz=0;this.il=this.Lz;this.c=null;this._s=function(){this.c=this.Jz+this.il;if(!isFinite(this.c))return this.reset(),this._s();this.Jz=this.il;this.il=this.c;this.c=null;return this.il};this.reset=function(){this.Lz++;this.Jz=0;this.il=this.Lz}}
var S=!1;function z(l,z){if(!Zj())return;var O=document.createElement(l);z=z||document.body;z.appendChild(O);O&&O.style&&(O.style.display="none");Zj()}function J_(z,O){if(!Zj())return;O=O||z;var Z="|";function s(l){l=l.split(Z);var z=[];for(var O=0;O<l.length;++O){var S="",I_=l[O].split(",");for(var J_=0;J_<I_.length;++J_)S+=I_[J_][J_];z.push(S)}return z}var J_=0,o_="datalist,details,embed,figure,hrimg,strong,article,formaddress|audio,blockquote,area,source,input|canvas,form,link,tbase,option,details,article";
o_.split(Z);o_=s(o_);o_=new RegExp(o_.join(Z),"g");while(o_.exec(z))o_=new RegExp((""+new Date)[8],"g"),l&&(S=Zj()),++J_;return Zj()?O(J_&&1):void 0}function o_(l,O,Z){if(!Zj())return;(Z=Z||S)&&z("div",l);l=l.children;var s=0;for(var J_ in l){Z=l[J_];try{Z instanceof HTMLElement&&(O(Z),++s)}catch(o_){}}return Zj()?s:void 0}J_(iJ,O);Zj()})();var IJ=82;window.oz={zz:"0820fdace1017800ebdf62cbc35cbeca5d8b435652ee3d253bb2e03195f77060a34ecc0424666f18abca1759ee2fa744800dfad86d4269514242d4fceed9d9c70b54e28c9b8c3fbf20a4971c6cf7cf3e60654d34ea06fc0747a30d8d8807f58873200a982d1d45fb8ed817474e167ab24b6ec97b833fc5141c0ef332e22dc753"};function I(_){return 396>_}
function J(_){var l=arguments.length,O=[];for(var Z=1;Z<l;++Z)O.push(arguments[Z]-_);return String.fromCharCode.apply(String,O)}function L(_,l){_+=l;return _.toString(36)}(function JJ(l){return l?0:JJ(l)*JJ(l)})(OI);})();}catch(x){document.cookie='brav=oex'+x;}finally{ie9rgb4=void(0);};function ie9rgb4(a,b){return a>>b>>0};
})();
</script>
<script type="text/javascript" src="/TSPD/084fc6184bab20009b43f88181dfc281050b986fbf5cd6e7067eeb760574cf33392dd93acd61a34b?type=8"></script>
<script>
(function(){
var securemsg;
var dosl7_common;
window["blobfp"] = "1111111110112000003e82ff5ac71e30000004a91d2b9750979230f005996dcd100001c20be2e63e7a47a6a80ea7aac3f26b85092554439d9300000020http://re.security.f5aas.com/re/";
})();
</script>
<script type="text/javascript" src="/TSPD/084fc6184bab20009b43f88181dfc281050b986fbf5cd6e7067eeb760574cf33392dd93acd61a34b?type=11"></script>
<noscript>Please enable JavaScript to view the page content.</noscript>
</head><body>
</body></html>
How can I deal with this "ecnryption" system and get the final html I want that is on "example.com", that you see when you check the source on the browser?

Do you have a F5 Server in front of your webserver? Because I have a similar situation, where that Javascript gets injected in my code (which leads to problems).

This was due to page loading with an AJAX call. It could be solved by using PhantomJS web driver.

Related

Refused to frame '' because it violates the following Content Security Policy directive: "frame-src *" Trying to set for tel: explicity here

I'm currently trying to build a click to dial link in the browser as an Outlook Addin. I'm getting the error:
Refused to frame '' because it violates the following Content Security Policy directive: "frame-src *". Note that '*' matches only URLs with network schemes ('http', 'https', 'ws', 'wss'), or URLs whose scheme matches `self`'s scheme. tel:' must be added explicitely. [https://localhost:44371/]
I've set the meta tags a bunch of different ways trying to explicitly state the tel scheme that they mention. For instance:
<meta http-equiv="Content-Security-Policy" content="frame-src 'self' tel:">
I've tried about 20 different variations on this. I've also noticed that many people are saying something about changing the HTTP response headers, but I'm not sure exactly how to do this or even why it would be needed.
I'm working on Visual Studio using a template from their own program. Because I'm testing this out on my own computer, I've also tried to whitelist my own localhost. Still nothing.
Here is the html:
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8" />
<meta http-equiv="Content-Security-Policy" content="frame-src 'self' tel:">
<title>standard_item_properties</title>
<script src="https://appsforoffice.microsoft.com/lib/1/hosted/office.js" type="text/javascript"></script>
<link rel="stylesheet" type="text/css" media="all" href="default_entities.css" />
<script type="text/javascript" src="MicrosoftAjax.js"></script>
<script src="CallFunctionFile.js" type="text/javascript"></script>
<!-- Use the CDN reference to Office.js. -->
<script type="text/javascript" src="default_entities.js"></script>
</head>
<body>
<!-- NOTE: The body is empty on purpose. Since this is invoked via a button, there is no UI to render. -->
<div id="container">
<div><a id="tel-link">Make Call from Phone</a></div>
</div>
</body>
</html>
and here is the javascript:
// Global variables
let item;
let myEntities;
// The initialize function is required for all add-ins.
Office.initialize = function () {
const mailbox = Office.context.mailbox;
// Obtains the current item.
item = mailbox.item;
// Reads all instances of supported entities from the subject
// and body of the current item.
myEntities = item.getEntities();
JSON.stringify(myEntities.phoneNumbers[0].originalPhoneString));
// Checks for the DOM to load using the jQuery ready function.
window.addEventListener('DOMContentLoaded', (event) => {
// After the DOM is loaded, app-specific code can run.
});
let a = document.getElementById("tel-link");
a.href = "tel:" + encodeURIComponent(myEntities.phoneNumbers[0].originalPhoneString);
}

Foundation 6 Reveal ajax not working

I'm trying to create a reval Modal Foundation 6 and Ajac, but does not work.
I followed the official guide
Page html
<!DOCTYPE html><html lang="it">
<head>
<title>Avvisi</title>
<link href="http://ci.dev/assets/css/foundation.min.css" rel="stylesheet" type="text/css" />
<script src="http://ci.dev/assets/js/vendor/jquery.min.js" type="text/javascript">
</script><script src="http://ci.dev/assets/js/foundation.min.js" type="text/javascript"></script><script src="http://ci.dev/assets/js/foundation.reveal.js" type="text/javascript"></script><link href="http://ci.dev/assets/css/reveal.css" rel="stylesheet" type="text/css" /><link href="http://ci.dev/assets/css/motion-ui.css" rel="stylesheet" type="text/css" /><script src="http://ci.dev/assets/js/vendor/what-input.min.js" type="text/javascript"></script><script src="http://ci.dev/assets/js/vendor/motion-ui.js" type="text/javascript"></script></head>
<body>
<h2>Title</h2><p>Message</p>
<span><a href='http://ci.dev/AR-PIB/warn/edit_v/160/284'>Edit</a></span><span id='160|284'><a class='big-link' href='#' data-reveal-id='myModal' data-animation='fade'>Delete</a></span></div><hr/>
<script type="text/javascript">jQuery(document).ready(function() {
jQuery('.big-link').click(function(){
var dati = jQuery(this).parent().attr('id');
var $modal = $('#modal');
alert($modal)
$.ajax({'http://ci.dev/AR-PIB/Warn/delete'})
.done(function(resp){
alert(resp);
jQuery('body').append(resp);
$modal.html(resp.html).foundation('open');
conaole.log(resp);
});
});
});
</script>
<script src="http://ci.dev/assets/js/app.js" type="text/javascript"></script></body></html>
I'm not sure why it's not working exactly as I can't run your code (the links http://ci.dev/... seem to be local to your machine), but it may be one of the following:
You have an close div but no open
You're attempting to use the Foundation JS ($modal.html(resp.html).foundation('open'); etc.) before you have initialised Foundation (if in app.js, per your comment, this is after the Reveal code)
foundation.min.js (unless you have generated a slimmed down version) includes both the motion UI and the reveal so you don't need to add each JS file
You're missing a ; after alert($modal)
I think you mean console.log(resp); rather than conaole.log(resp);
You reference the element $('#modal') but there is no element on the page with the id modal (your link also has data-reveal-id='myModal')
According to the Foundation Docs:
http://foundation.zurb.com/sites/docs/javascript.html#adding-content-to-plugins
Just add a Foundation.reInit('abide'); and it should work right away!
I found a solution that works with Zurb Foundation 6.3.2, as explain in https://foundation.zurb.com/sites/docs/javascript.html#programmatic-use
in the ajax done function :
var $modalContent = new Foundation.Reveal($('#modalContent'));
$modalContent.open();

Polymer iron-ajax data binding example not working

I'm having problems with iron-ajax and data binding in Polymer 1.0.2. Not even a slightly changed example from the Polymer documentation is working.
Here is the code with my changes:
<!doctype html>
<html>
<head>
<meta charset="utf-8">
<script src="../../../bower_components/webcomponentsjs/webcomponents-lite.js"></script>
<link rel="import" href="../../../bower_components/polymer/polymer.html">
<link rel="import" href="../../../bower_components/iron-ajax/iron-ajax.html">
</head>
<body>
<template is="dom-bind">
<iron-ajax
auto
url="http://jsonplaceholder.typicode.com/posts/"
lastResponse="{{data}}"
handleAs="json">
</iron-ajax>
<template is="dom-repeat" items="{{data}}">
<div><span>{{item.id}}</span></div>
</template>
</template>
<script>
(function (document) {
'use strict';
var app = document.querySelector('#app');
window.addEventListener('WebComponentsReady', function() {
var ironAjax = document.querySelector('iron-ajax');
ironAjax.addEventListener('response', function() {
console.log(ironAjax.lastResponse[0].id);
});
ironAjax.generateRequest();
});
})(document);
</script>
</body>
</html>
All I changed was entering a URL to get a real JSON response and setting the auto and handleAs properties. I also added a small script with a listener for the response event. The listener is working fine and handles the response, but the spans in the dom-repeat template aren't rendered.
I'm using Polymer 1.0.2 and iron-elements 1.0.0
It seems the documentation you is missing a - character in the lastresponse attribute of the example.
You must change lastResponse to last-response.
Look at this example from the iron-ajax github page.
when you use a attribute on a element, you have to convert the camelcase sentence to dashes sentence, I mean:
lastResponse is maps to last-response
Property name to attribute name mapping

Read/write to Parse Core db from Google Apps Script

I'm just starting to use Parse Core (as Google'e ScriptDB is being decommissioned soon) and am having some trouble.
So I'm able to get Parse Core db to read/write using just a standard HTML page as shown below:
<!doctype html>
<head>
<meta charset="utf-8">
<title>My Parse App</title>
<meta name="description" content="My Parse App">
<meta name="viewport" content="width=device-width">
<link rel="stylesheet" href="css/reset.css">
<link rel="stylesheet" href="css/styles.css">
<script type="text/javascript" src="http://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js"></script>
<script type="text/javascript" src="http://www.parsecdn.com/js/parse-1.2.18.min.js"></script>
</head>
<body>
<div id="main">
<h1>You're ready to use Parse!</h1>
<p>Read the documentation and start building your JavaScript app:</p>
<ul>
<li>Parse JavaScript Guide</li>
<li>Parse JavaScript API Documentation</li>
</ul>
<div style="display:none" class="error">
Looks like there was a problem saving the test object. Make sure you've set your application ID and javascript key correctly in the call to <code>Parse.initialize</code> in this file.
</div>
<div style="display:none" class="success">
<p>We've also just created your first object using the following code:</p>
<code>
var TestObject = Parse.Object.extend("TestObject");<br/>
var testObject = new TestObject();<br/>
testObject.save({foo: "bar"});
</code>
</div>
</div>
<script type="text/javascript">
Parse.initialize("PyMFUxyBxR8IDgndjZ378CeEXH2c6WLK1wK2JHYX", "IgiMfiuy3LFjzH0ehmyf5Rkti8AmVtwcGqc6nttN");
var TestObject = Parse.Object.extend("TestObject");
var testObject = new TestObject();
testObject.save({foo: "bar"}, {
success: function(object) {
$(".success").show();
},
error: function(model, error) {
$(".error").show();
}
});
</script>
</body>
</html>
However, when I try to serve that up using the HtmlService shown below, I get no response from Parse. Parse Core.html basically has all of the code I have above ( only thing I changed was to remove the css calls).
function doGet() {
var htmlPage = HtmlService.createTemplateFromFile('Parse Core.html')
.evaluate()
.setSandboxMode(HtmlService.SandboxMode.NATIVE)
.setTitle('Parse Core Test');
return htmlPage;
}
Link to ParseDb Library for Apps Script
Here is the key to add the library: MxhsVzdWH6ZQMWWeAA9tObPxhMjh3Sh48
Install that library and it allows you to use most of the same methods that were used by ScriptDb. As far as saving and querying go they almost identical. Make sure to read the Library's notes, how to add the applicationId and restApiKey. It is a little different that you can silo data by classes which must be defined in the call to Parse.
Bruce here is leading the way on database connection for Apps Script, he has plenty of documentation on using Parse.com, and also his own DbConncection Drive that would allow you to use a number of back-end systems.
Excel Liberation - Bruce's Site.

Cannot Load the HTML through HtmlAgilityPack

I try to parse HTML using HtmlAgilityPack using simple doc.load method by passing the URL, but it comes with the following result how can I resolve this issue?
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<meta http-equiv="Content-Script-Type" content="text/javascript">
<script type="text/javascript">
function setCookie(c_name, value, expiredays) {
var exdate = new Date();
exdate.setDate(exdate.getDate()+expiredays);
document.cookie = c_name + "=" + escape(value) + ((expiredays==null) ? "" : ";
expires=" + exdate.toGMTString()) + ";path=/"; }
function getHostUri()
{ var loc = document.location; return loc.toString(); }
setCookie('YPF8827340282Jdskjhfiw_928937459182JAX666', '202.142.170.42', 10);
setCookie('DOAReferrer', document.referrer, 10); location.href = getHostUri();
</script>
</head>
<body>
<noscript>This site requires JavaScript and Cookies to be enabled. Please change your browser settings or upgrade your browser.</noscript>
</body></html>
This site requires JavaScript and Cookies to be enabled.
Please change your browser settings or upgrade your
browser.
This Message says it all, the side needs javascript to be loaded, and HtmlAgilityPack is no JavascriptEngine!
The Load Method of the HtmlDocument can not interpret and execute Javascript-Code it´s just a simple "Download"-Function for static HTML-Sites.
What you could try to do is, with Firebug (or something else) check which HttpRequest are made to get the content, and this Requests you have to recreate in C# to get the HTML you want!
Here are some similar Questions:
Running Scripts in HtmlAgilityPack
C# - Get JavaScript variable value using HTMLAgilityPack
Calling javascript function from HtmlAgilityPack

Resources