ETL scenario language
ETL scenarios must be written in special XML-based language.
Language
- ETL scenario must be a well formed XML. Check out official XML specification
- For special characters which are not allowed in the well formed XML use correct XML substitutions
- All node and attribute names are case sensitive
- Values generally are not case sensitive except class names
- Class names must be fully qualified. For example com.toolsverse.etl.driver.oracle.OracleDriver
- Order of nodes in the node list matters. For examples if there are multiple <task> nodes under <tasks> they will be executed in order
- For comments use
<!-- text -->
Language Artifacts
- <scenario> - root node in XML. Used to define global attributes such as scenario name, description, script name, etc. Includes other nodes. Variables under scenario used to define global input parameters which can be used to parameterize scenario.
- <execute> - root node for inner scenarios.
- <scenario> - inner scenario under <execute>. Inner scenario is a sort of sub program executed as a part of owner's scenario.
- <sources> - root node for all sources.
-
Invalid language.
You need to specify a language like this: <source lang="html4strict">...</source>
Supported languages for syntax highlighting:
4cs, 6502acme, 6502kickass, 6502tasm, 68000devpac, abap, actionscript, actionscript3, ada, algol68, apache, applescript, apt_sources, arm, asm, asp, asymptote, autoconf, autohotkey, autoit, avisynth, awk, bascomavr, bash, basic4gl, bf, bibtex, blitzbasic, bnf, boo, c, c_loadrunner, c_mac, caddcl, cadlisp, cfdg, cfm, chaiscript, cil, clojure, cmake, cobol, coffeescript, cpp, cpp-qt, csharp, css, cuesheet, d, dcl, dcpu16, dcs, delphi, diff, div, dos, dot, e, ecmascript, eiffel, email, epc, erlang, euphoria, f1, falcon, fo, fortran, freebasic, freeswitch, fsharp, gambas, gdb, genero, genie, gettext, glsl, gml, gnuplot, go, groovy, gwbasic, haskell, haxe, hicest, hq9plus, html4strict, html5, icon, idl, ini, inno, intercal, io, j, java, java5, javascript, jquery, kixtart, klonec, klonecpp, latex, lb, ldif, lisp, llvm, locobasic, logtalk, lolcode, lotusformulas, lotusscript, lscript, lsl2, lua, m68k, magiksf, make, mapbasic, matlab, mirc, mmix, modula2, modula3, mpasm, mxml, mysql, nagios, netrexx, newlisp, nsis, oberon2, objc, objeck, ocaml, ocaml-brief, octave, oobas, oorexx, oracle11, oracle8, oxygene, oz, parasail, parigp, pascal, pcre, per, perl, perl6, pf, php, php-brief, pic16, pike, pixelbender, pli, plsql, postgresql, povray, powerbuilder, powershell, proftpd, progress, prolog, properties, providex, purebasic, pycon, pys60, python, q, qbasic, rails, rebol, reg, rexx, robots, rpmspec, rsplus, ruby, sas, scala, scheme, scilab, sdlbasic, smalltalk, smarty, spark, sparql, sql, stonescript, systemverilog, tcl, teraterm, text, thinbasic, tsql, typoscript, unicon, upc, urbi, uscript, vala, vb, vbnet, vedit, verilog, vhdl, vim, visualfoxpro, visualprolog, whitespace, whois, winbatch, xbasic, xml, xorg_conf, xpp, yaml, z80, zxbasic
- "extract from" object. Can include transformations and tasks which performed on the "extract from" object. Variables under source used to set dataset variables.
<extract keyname="key name" keyfield="key fields">
<writer class="writer_class_name" attributes />
<writer>writer_class_name</writer>
<reader class="writer_class_name" attributes />
<reader>reader_class_name</reader>
<driver>auto|driver_class_name</driver>
<driver name="auto|driver_class_name" parent="parent_driver_class_name" literalsize="int" varcharsize="int" charsize="int" precision="int" scale="int" lineslimit="int" initsql="sql" case="empty|lower|upper"/>
<connection>source connection name</connection>
<sql>sql</sql>
<using>using</using>
<statement>empty|prepared|callable</statement>
<indexes>coma_delimited_list_of_output_
param_indexes</indexes>
<onexception action="continue|ignore|raise|ignoreparseerror" mask="mask" />
<variables>
<variable_name name="variable name" value="value" attributes />
</variables>
</extract>
<tasks>
<task noconnection="false|true" commit="false|true" scope="after|pre|inline|before_etl">
<name>task name</name>
<tablename>table name</tablename>
<class>task_class_name</class>
<connection>task connection name</connection>
<driver>auto|driver_class_name</driver>
<driver name="auto|driver_class_name" parent="parent_driver_class_name" literalsize="int" varcharsize="int" charsize="int" precision="int" scale="int" lineslimit="int" initsql="sql" case="empty|lower|upper"/>
<sql|code|cmd>code</sql|code|cmd>
<using>using</using>
<onexception action="continue|ignore|raise" mask="mask" />
<variables>
<variable_name name="variable name" value="value" attributes />
</variables>
</task>
</tasks>
</source>
</sources>
<destinations>
<destination encode="false|true" empty="false|true" tolerate="false|true" noconnection="false|true" type="regular|procedure|function|wait|table" enabled="true|false" parallel="false|true" scope="global|single" condition="code" conditionlang="SQL|JavaScript|Others" condition_connection="connection name"> source="false|true"
<name>destination name</name>
<source>linked source name</source>
<objectname|tablename>object or table name</objectname|tablename>
<cursor table="table name" sql="sql" type="regular|temp|temporary" onfinish="keep|drop" />
<metadata>true|false</metadata>
<metadata indexes="false|true" suffix="suffix"/>
<load stream="false|true" key="keys" action="insert|update|delete|merge|conditional" condition="code" bind="false|true" window="int">
<connection>destination connection name</connection>
<driver>auto|driver_class_name</driver>
<driver name="auto|driver_class_name" parent="parent_driver_class_name" literalsize="int" varcharsize="int" charsize="int" precision="int" scale="int" lineslimit="int" initsql="sql" case="empty|lower|upper"/>
<condition>sql</condition>
<then>sql</then>
<else>sql</else>
<after>sql</after>
<sql>sql</sql>
<onexception action="continue|ignore|merge|raise" mask="mask" key="keys" savepoint="false|true" />
<reader class="writer_class_name" attributes />
<reader>reader_class_name</reader>
<writer class="writer_class_name" attributes />
<writer>writer_class_name</writer>
<variables>
<variable_name name="variable name" tablename="table name" function="function" class="function_class_name" field="field" value="value" sql|code="sql|code" lang="SQL|JavaScript|Others" type="sql" linked="linked var name" label="label" param="params" scope="before|after|runtime" destination="linked destination name" tolerate="false|true" include="true|false" exclude="false|true" add="false|true" global="false|true" declare="sql" sqltype="sqltype" nativetype="nativetype" attributes />
</variables>
<cache>cache_class_name</cache>
</load>
<tasks>
<task noconnection="false|true" commit="false|true" scope="after|pre|inline|before_etl" >
<name>task name</name>
<tablename>table name</tablename>
<class>task_class_name</class>
<connection>task connection name</connection>
<driver>auto|driver_class_name</driver>
<driver name="auto|driver_class_name" parent="parent_driver_class_name" literalsize="int" varcharsize="int" charsize="int" precision="int" scale="int" lineslimit="int" initsql="sql" case="empty|lower|upper"/>
<sql|code|cmd>code</sql|code|cmd>
<using>using</using>
<onexception action="continue|ignore|raise" mask="mask" />
<variables>
<variable_name name="variable name" value="value" attributes />
</variables>
</task>
</tasks>
</destination>
</destinations>
<aftertasks>
<task noconnection="false|true" commit="false|true">
<name>task name</name>
<tablename>table name</tablename>
<class>task_class_name</class>
<connection>task connection name</connection>
<driver>auto|driver_class_name</driver>
<driver name="auto|driver_class_name" parent="parent_driver_class_name" literalsize="int" varcharsize="int" charsize="int" precision="int" scale="int" lineslimit="int" initsql="sql" case="empty|lower|upper"/>
<sql|code|cmd>code</sql|code|cmd>
<using>using</using>
<onexception action="continue|ignore|raise" mask="mask" />
<variables>
<variable_name name="variable name" value="value" attributes />
</variables>
</task>
</aftertasks>
</scenario>