Download latest jsoup jar file (Download Link).
Compile code with appropriate class path value, like
javac -cp "C:\jsoup-1.7.1.jar" "TestClass.java"
java -cp "C:\jsoup-1.7.1.jar" TestClass
Simple Example using Jsoup to connect to server using login credentials and then retrieving specific page.
[sourcecode language="java"]
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.Connection;
import java.io.IOException;
import org.jsoup.Connection.Method;
import java.util.HashMap;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
public class TestClass
{
public static void main(String args[]) throws IOException
{
Document doc = Jsoup.connect("<URL>").get();
Elements viewState = doc.select("input[name=__VIEWSTATE");
Elements eventValidation = doc.select("input[name=__EVENTVALIDATION]");
Map<String,String> allFields = new HashMap<String,String>();
allFields.put("__VIEWSTATE", viewState.val());
allFields.put("__EVENTVALIDATION", eventValidation.val());
allFields.put("txtLogin", "<USERNAME>");
allFields.put("txtPassword", "<PASSWORD>");
allFields.put("butSubmit", "Sign In");
System.out.println(allFields);
Connection.Response res = Jsoup.connect("<URL2>")
.userAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.21 (KHTML, like Gecko) Chrome/19.0.1042.0 Safari/535.21")
.data(allFields)
.method(Method.POST).
execute();
String sessionId = res.cookie("<COOKIENAME>");
System.out.println(sessionId);
Document doc2 = Jsoup.connect("URL3")
.cookie("ASP.NET_SessionId", sessionId)
.timeout(0)
.get();
System.out.println(doc2.html());
}
}
[/sourcecode]
Wednesday, January 30, 2013
Web scrapping using Jsoup
Labels:
java,
java library,
jsoup,
Text Mining,
web scrapping
HTML output in Oracle PL SQL
Requirement : Generating HTML output using Oracle PL SQL
Solution :
Thomas Kyte shared below piece of code on his blog for generating html output from refcursor.
[sourcecode language="sql"]
CREATE OR REPLACE FUNCTION apps.fncrefcursor2html (rf sys_refcursor)
RETURN CLOB
IS
lretval CLOB;
lhtmloutput XMLTYPE;
lxsl CLOB;
lxmldata XMLTYPE;
lcontext DBMS_XMLGEN.ctxhandle;
BEGIN
-- get a handle on the ref cursor --
lcontext := DBMS_XMLGEN.newcontext (rf);
-- setNullHandling to 1 (or 2) to allow null columns to be displayed --
DBMS_XMLGEN.setnullhandling (lcontext, 1);
-- create XML from ref cursor --
lxmldata := DBMS_XMLGEN.getxmltype (lcontext, DBMS_XMLGEN.NONE);
IF lxmldata IS NOT NULL
THEN
-- this is a generic XSL for Oracle's default XML row and rowset tags --
-- " " is a non-breaking space --
lxsl := lxsl || q'[<?xml version="1.0" encoding="ISO-8859-1"?>]';
lxsl :=
lxsl
|| q'[<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">]';
lxsl := lxsl || q'[ <xsl:output method="html"/>]';
lxsl := lxsl || q'[ <xsl:template match="/">]';
lxsl := lxsl || q'[ <table >]';
lxsl := lxsl || q'[ <tr >]';
lxsl := lxsl || q'[ <xsl:for-each select="/ROWSET/ROW[1]/*">]';
lxsl := lxsl || q'[ <th><xsl:value-of select="name()"/></th>]';
lxsl := lxsl || q'[ </xsl:for-each>]';
lxsl := lxsl || q'[ </tr>]';
lxsl := lxsl || q'[ <xsl:for-each select="/ROWSET/*">]';
lxsl := lxsl || q'[ <tr>]';
lxsl := lxsl || q'[ <xsl:for-each select="./*">]';
lxsl := lxsl || q'[ <td><xsl:value-of select="text()"/> </td>]';
lxsl := lxsl || q'[ </xsl:for-each>]';
lxsl := lxsl || q'[ </tr>]';
lxsl := lxsl || q'[ </xsl:for-each>]';
lxsl := lxsl || q'[ </table>]';
lxsl := lxsl || q'[ </xsl:template>]';
lxsl := lxsl || q'[</xsl:stylesheet>]';
-- XSL transformation to convert XML to HTML --
lhtmloutput := lxmldata.transform (XMLTYPE (lxsl));
-- convert XMLType to Clob --
lretval := lhtmloutput.getclobval ();
RETURN lretval;
ELSE
RETURN NULL;
END IF;
END fncrefcursor2html;
[/sourcecode]
Simple package using above function to write table output to Oracle Apps output file.
[sourcecode language="sql"]
CREATE OR REPLACE PACKAGE apps.write_html_pkg
AS
PROCEDURE write_clob (l_clob_data CLOB);
PROCEDURE write_html_prc (o_err_msg OUT VARCHAR2, o_err_code OUT NUMBER);
END;
CREATE OR REPLACE PACKAGE BODY apps.write_html_pkg
AS
PROCEDURE write_clob (l_clob_data CLOB)
AS
l_char_data VARCHAR2 (32767);
v_clob_len NUMBER;
v_chunk_length NUMBER;
v_iterations NUMBER;
v_clob_temp CLOB;
BEGIN
IF l_clob_data IS NOT NULL
THEN
v_chunk_length := 32767;
v_clob_len := DBMS_LOB.getlength (l_clob_data);
v_iterations := CEIL (v_clob_len / v_chunk_length);
-- v_chunk_length is 32767 and v_clob_length is length of the XML data stored inthe clob variable
FOR i IN 0 .. v_iterations
LOOP
v_clob_temp :=
DBMS_LOB.SUBSTR (l_clob_data,
v_chunk_length,
(i * v_chunk_length) + 1
);
fnd_file.put_line (fnd_file.output, v_clob_temp);
END LOOP;
ELSE
NULL;
END IF;
END;
PROCEDURE OUT (p_text VARCHAR2)
AS
BEGIN
fnd_file.put_line (fnd_file.output, p_text);
END;
PROCEDURE write_html_prc (o_err_msg OUT VARCHAR2, o_err_code OUT NUMBER)
IS
l_cursor sys_refcursor;
l_clob_data CLOB;
l_style VARCHAR2 (32767);
BEGIN</pre>
l_style :=
q'[<style type="text/css">
table
{
border-collapse:collapse;
}
table td, table th {
border:1px solid grey;
border:1px solid #98bf21;
padding:3px 7px 2px 7px;
}
table th
{
text-align:left;
padding-top:5px;
padding-bottom:4px;
background-color:#A7C942;
color:#fff;
}
</style>]';
OUT (q'[ <html>]');
OUT (q'[ <head>]');
OUT (l_style);
OUT (q'[ </head>]');
OUT (q'[ <body>]');
<pre>
--------------------------------write table-----------------------------
OPEN l_cursor FOR
SELECT *
FROM scott.emp;
l_clob_data := fncrefcursor2html (l_cursor);
write_clob (l_clob_data);
--------------------------------END :write table-----------------------------
OUT (q'[ </body>]');
OUT (q'[ </html>]');
END;
END write_html_pkg;
/
[/sourcecode]
Output Screenshot
Solution :
Thomas Kyte shared below piece of code on his blog for generating html output from refcursor.
[sourcecode language="sql"]
CREATE OR REPLACE FUNCTION apps.fncrefcursor2html (rf sys_refcursor)
RETURN CLOB
IS
lretval CLOB;
lhtmloutput XMLTYPE;
lxsl CLOB;
lxmldata XMLTYPE;
lcontext DBMS_XMLGEN.ctxhandle;
BEGIN
-- get a handle on the ref cursor --
lcontext := DBMS_XMLGEN.newcontext (rf);
-- setNullHandling to 1 (or 2) to allow null columns to be displayed --
DBMS_XMLGEN.setnullhandling (lcontext, 1);
-- create XML from ref cursor --
lxmldata := DBMS_XMLGEN.getxmltype (lcontext, DBMS_XMLGEN.NONE);
IF lxmldata IS NOT NULL
THEN
-- this is a generic XSL for Oracle's default XML row and rowset tags --
-- " " is a non-breaking space --
lxsl := lxsl || q'[<?xml version="1.0" encoding="ISO-8859-1"?>]';
lxsl :=
lxsl
|| q'[<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">]';
lxsl := lxsl || q'[ <xsl:output method="html"/>]';
lxsl := lxsl || q'[ <xsl:template match="/">]';
lxsl := lxsl || q'[ <table >]';
lxsl := lxsl || q'[ <tr >]';
lxsl := lxsl || q'[ <xsl:for-each select="/ROWSET/ROW[1]/*">]';
lxsl := lxsl || q'[ <th><xsl:value-of select="name()"/></th>]';
lxsl := lxsl || q'[ </xsl:for-each>]';
lxsl := lxsl || q'[ </tr>]';
lxsl := lxsl || q'[ <xsl:for-each select="/ROWSET/*">]';
lxsl := lxsl || q'[ <tr>]';
lxsl := lxsl || q'[ <xsl:for-each select="./*">]';
lxsl := lxsl || q'[ <td><xsl:value-of select="text()"/> </td>]';
lxsl := lxsl || q'[ </xsl:for-each>]';
lxsl := lxsl || q'[ </tr>]';
lxsl := lxsl || q'[ </xsl:for-each>]';
lxsl := lxsl || q'[ </table>]';
lxsl := lxsl || q'[ </xsl:template>]';
lxsl := lxsl || q'[</xsl:stylesheet>]';
-- XSL transformation to convert XML to HTML --
lhtmloutput := lxmldata.transform (XMLTYPE (lxsl));
-- convert XMLType to Clob --
lretval := lhtmloutput.getclobval ();
RETURN lretval;
ELSE
RETURN NULL;
END IF;
END fncrefcursor2html;
[/sourcecode]
Simple package using above function to write table output to Oracle Apps output file.
[sourcecode language="sql"]
CREATE OR REPLACE PACKAGE apps.write_html_pkg
AS
PROCEDURE write_clob (l_clob_data CLOB);
PROCEDURE write_html_prc (o_err_msg OUT VARCHAR2, o_err_code OUT NUMBER);
END;
CREATE OR REPLACE PACKAGE BODY apps.write_html_pkg
AS
PROCEDURE write_clob (l_clob_data CLOB)
AS
l_char_data VARCHAR2 (32767);
v_clob_len NUMBER;
v_chunk_length NUMBER;
v_iterations NUMBER;
v_clob_temp CLOB;
BEGIN
IF l_clob_data IS NOT NULL
THEN
v_chunk_length := 32767;
v_clob_len := DBMS_LOB.getlength (l_clob_data);
v_iterations := CEIL (v_clob_len / v_chunk_length);
-- v_chunk_length is 32767 and v_clob_length is length of the XML data stored inthe clob variable
FOR i IN 0 .. v_iterations
LOOP
v_clob_temp :=
DBMS_LOB.SUBSTR (l_clob_data,
v_chunk_length,
(i * v_chunk_length) + 1
);
fnd_file.put_line (fnd_file.output, v_clob_temp);
END LOOP;
ELSE
NULL;
END IF;
END;
PROCEDURE OUT (p_text VARCHAR2)
AS
BEGIN
fnd_file.put_line (fnd_file.output, p_text);
END;
PROCEDURE write_html_prc (o_err_msg OUT VARCHAR2, o_err_code OUT NUMBER)
IS
l_cursor sys_refcursor;
l_clob_data CLOB;
l_style VARCHAR2 (32767);
BEGIN</pre>
l_style :=
q'[<style type="text/css">
table
{
border-collapse:collapse;
}
table td, table th {
border:1px solid grey;
border:1px solid #98bf21;
padding:3px 7px 2px 7px;
}
table th
{
text-align:left;
padding-top:5px;
padding-bottom:4px;
background-color:#A7C942;
color:#fff;
}
</style>]';
OUT (q'[ <html>]');
OUT (q'[ <head>]');
OUT (l_style);
OUT (q'[ </head>]');
OUT (q'[ <body>]');
<pre>
--------------------------------write table-----------------------------
OPEN l_cursor FOR
SELECT *
FROM scott.emp;
l_clob_data := fncrefcursor2html (l_cursor);
write_clob (l_clob_data);
--------------------------------END :write table-----------------------------
OUT (q'[ </body>]');
OUT (q'[ </html>]');
END;
END write_html_pkg;
/
[/sourcecode]
Output Screenshot
Subscribe to:
Posts (Atom)