Wednesday, January 30, 2013

Web scrapping using Jsoup

Download latest jsoup jar file (Download Link).

Compile code with appropriate class path value, like

javac -cp "C:\jsoup-1.7.1.jar"  "TestClass.java"

java  -cp  "C:\jsoup-1.7.1.jar"  TestClass

Simple Example using Jsoup to connect to server using login credentials and then retrieving specific page.

[sourcecode language="java"]

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.Connection;
import java.io.IOException;
import org.jsoup.Connection.Method;
import java.util.HashMap;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

public class TestClass
{
public static void main(String args[]) throws IOException
{

Document doc = Jsoup.connect("<URL>").get();

Elements viewState = doc.select("input[name=__VIEWSTATE");
Elements eventValidation = doc.select("input[name=__EVENTVALIDATION]");

Map<String,String> allFields = new HashMap<String,String>();
allFields.put("__VIEWSTATE", viewState.val());
allFields.put("__EVENTVALIDATION", eventValidation.val());
allFields.put("txtLogin", "<USERNAME>");
allFields.put("txtPassword",   "<PASSWORD>");
allFields.put("butSubmit",   "Sign In");

System.out.println(allFields);

Connection.Response res = Jsoup.connect("<URL2>")
.userAgent("Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/535.21 (KHTML, like Gecko) Chrome/19.0.1042.0 Safari/535.21")
.data(allFields)
.method(Method.POST).
execute();

String sessionId = res.cookie("<COOKIENAME>");

System.out.println(sessionId);

Document doc2  = Jsoup.connect("URL3")
.cookie("ASP.NET_SessionId", sessionId)
.timeout(0)
.get();

System.out.println(doc2.html());

}
}

[/sourcecode]

HTML output in Oracle PL SQL

Requirement : Generating HTML output using Oracle PL SQL

Solution :
Thomas Kyte shared below piece of code on his blog for generating html output from refcursor.

[sourcecode language="sql"]

CREATE OR REPLACE FUNCTION apps.fncrefcursor2html (rf sys_refcursor)
RETURN CLOB
IS
lretval       CLOB;
lhtmloutput   XMLTYPE;
lxsl          CLOB;
lxmldata      XMLTYPE;
lcontext      DBMS_XMLGEN.ctxhandle;
BEGIN
-- get a handle on the ref cursor --
lcontext := DBMS_XMLGEN.newcontext (rf);
-- setNullHandling to 1 (or 2) to allow null columns to be displayed --
DBMS_XMLGEN.setnullhandling (lcontext, 1);
-- create XML from ref cursor --
lxmldata := DBMS_XMLGEN.getxmltype (lcontext, DBMS_XMLGEN.NONE);

IF lxmldata IS NOT NULL
THEN
-- this is a generic XSL for Oracle's default XML row and rowset tags --
-- " " is a non-breaking space --
lxsl := lxsl || q'[<?xml version="1.0" encoding="ISO-8859-1"?>]';
lxsl :=
lxsl
|| q'[<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">]';
lxsl := lxsl || q'[ <xsl:output method="html"/>]';
lxsl := lxsl || q'[ <xsl:template match="/">]';
lxsl := lxsl || q'[   <table >]';
lxsl := lxsl || q'[     <tr >]';
lxsl := lxsl || q'[      <xsl:for-each select="/ROWSET/ROW[1]/*">]';
lxsl := lxsl || q'[       <th><xsl:value-of select="name()"/></th>]';
lxsl := lxsl || q'[      </xsl:for-each>]';
lxsl := lxsl || q'[     </tr>]';
lxsl := lxsl || q'[     <xsl:for-each select="/ROWSET/*">]';
lxsl := lxsl || q'[      <tr>]';
lxsl := lxsl || q'[       <xsl:for-each select="./*">]';
lxsl := lxsl || q'[        <td><xsl:value-of select="text()"/> </td>]';
lxsl := lxsl || q'[       </xsl:for-each>]';
lxsl := lxsl || q'[      </tr>]';
lxsl := lxsl || q'[     </xsl:for-each>]';
lxsl := lxsl || q'[   </table>]';
lxsl := lxsl || q'[ </xsl:template>]';
lxsl := lxsl || q'[</xsl:stylesheet>]';
-- XSL transformation to convert XML to HTML --
lhtmloutput := lxmldata.transform (XMLTYPE (lxsl));
-- convert XMLType to Clob --
lretval := lhtmloutput.getclobval ();
RETURN lretval;
ELSE
RETURN NULL;
END IF;
END fncrefcursor2html;
[/sourcecode]

Simple package using above function to write table output to Oracle Apps output file.

[sourcecode language="sql"]
CREATE OR REPLACE PACKAGE  apps.write_html_pkg
AS
PROCEDURE write_clob (l_clob_data CLOB);
PROCEDURE write_html_prc (o_err_msg OUT VARCHAR2, o_err_code OUT NUMBER);

END;

CREATE OR REPLACE PACKAGE BODY apps.write_html_pkg
AS
PROCEDURE write_clob (l_clob_data CLOB)
AS
l_char_data      VARCHAR2 (32767);
v_clob_len       NUMBER;
v_chunk_length   NUMBER;
v_iterations     NUMBER;
v_clob_temp      CLOB;
BEGIN
IF l_clob_data IS NOT NULL
THEN
v_chunk_length := 32767;
v_clob_len := DBMS_LOB.getlength (l_clob_data);
v_iterations := CEIL (v_clob_len / v_chunk_length);

-- v_chunk_length is 32767 and v_clob_length is length of the XML data stored inthe clob variable
FOR i IN 0 .. v_iterations
LOOP
v_clob_temp :=
DBMS_LOB.SUBSTR (l_clob_data,
v_chunk_length,
(i * v_chunk_length) + 1
);
fnd_file.put_line (fnd_file.output, v_clob_temp);
END LOOP;
ELSE
NULL;
END IF;
END;

PROCEDURE OUT (p_text VARCHAR2)
AS
BEGIN
fnd_file.put_line (fnd_file.output, p_text);
END;

PROCEDURE write_html_prc (o_err_msg OUT VARCHAR2, o_err_code OUT NUMBER)
IS
l_cursor      sys_refcursor;
l_clob_data   CLOB;
l_style       VARCHAR2 (32767);
BEGIN</pre>
l_style :=
         q'[<style type="text/css">
         table
{
border-collapse:collapse;
}

table td, table th {
    border:1px solid grey;
    border:1px solid #98bf21;
    padding:3px 7px 2px 7px;
}

table th
{

text-align:left;
padding-top:5px;
padding-bottom:4px;
background-color:#A7C942;
color:#fff;
}

  </style>]';
      OUT (q'[ <html>]');
      OUT (q'[ <head>]');
      OUT (l_style);
      OUT (q'[ </head>]');
      OUT (q'[  <body>]');
<pre>

--------------------------------write table-----------------------------
OPEN l_cursor FOR
SELECT *
FROM scott.emp;

l_clob_data := fncrefcursor2html (l_cursor);
write_clob (l_clob_data);
--------------------------------END :write table-----------------------------
OUT (q'[  </body>]');
OUT (q'[ </html>]');
END;
END write_html_pkg;
/
[/sourcecode]

Output Screenshot
Output