public final class CommonEtlUtils
extends Object
Modifier and Type | Field and Description |
---|---|
static String |
MODE_PROP
MODE property.
|
static String |
PARENT_ROW_ID_FIELD_NAME
The Constant PARENT_ROW_ID_FIELD_NAME.
|
static String |
PASSIVE_MODE
The PASSIVE MODE.
|
static String |
PROXY_HOST_PROP
PROXY_HOST property.
|
static String |
PROXY_PORT_PROP
PROXY_PORT property.
|
static String |
ROW_ID_FIELD_NAME
The Constant ROW_ID_FIELD_NAME.
|
Constructor and Description |
---|
CommonEtlUtils() |
Modifier and Type | Method and Description |
---|---|
static DataSet |
addDimensions(DataSet drivingDataSet,
String keys,
String addColumns,
boolean keepAllFields,
DataSet... dataSetsToJoin)
Add dimensions to the data set.
|
static DataSetFields |
cloneFields(DataSetFields fields)
Clone fields.
|
static DataSet |
denormalize(DataSet source,
boolean cleanUp)
Pivot data set which has multiple versions of the same field.
|
static DataSet |
executeSql(DataSet dataSet,
String sql)
Execute sql on any data set.
|
static DataSet |
executeSql(DataSet dataSet,
String sql,
TypedKeyValue<String,Object>... args)
Execute sql on any data set.
|
static DataSet |
executeSql(String sql,
DataSet... dataSets)
Execute sql on arrays of data sets.
|
static DataSet |
executeSql(String sql,
TypedKeyValue<String,Object>[] args,
DataSet... dataSets)
Execute sql on arrays of data sets.
|
static DataSet |
extractDimension(String keys,
DataSet drivingDataSet,
String name)
Extract dimension from multidimensional data set.
|
static DataSet |
filter(DataSet dataSet,
String conditions)
Filter data set and keep the source data set intact.
|
static DataSet |
filter(DataSet dataSet,
String conditions,
boolean keepOriginal)
Filter data set.
|
static Map<String,String> |
getCaseInsensitiveKeys(String key)
Gets the case insensitive keys.
|
static Map<String,List<FieldMapping>> |
getFieldMappingPerDataSet(List<FieldMapping> mapping)
Gets the field mapping per data set.
|
static String |
getFieldName(List<FieldMapping> mapping,
int index,
boolean isSource)
Gets the fully qualified field name.
|
static DataSetFields |
getFieldsAfterIncludeExclude(DataSet dataSet,
Set<String> includeFields,
Set<String> excludeFields)
Gets the map of fields after include and exclude.
|
static String |
getFieldsAsString(DataSetFields fields)
Gets the fields as a comma delimited string.
|
static Map<String,FieldDef> |
getFieldsExceptExcluded(String excludedFields,
Map<String,FieldDef> dataSetFields)
Returns the map of the fields except given excluded fields.
|
static Object |
getFieldValue(DataSet dataSet,
String sql,
String fieldName)
Execute sql on any data set and return value of the given field.
|
static Object |
getFieldValue(DataSet dataSet,
String sql,
String fieldName,
TypedKeyValue<String,Object>... args)
Execute sql on any data set and return value of the given field.
|
static TypedKeyValue<String,String> |
getFrom(String sql)
Gets the from and modified SQL statement without from.
|
static String |
getKey(DataSet dataSet,
DataSetRecord record,
Map<String,FieldDef> keys,
boolean ignoreCase,
boolean doTrim)
Gets the string representation of the key for the given record and map of
key fields.
|
static Map<String,FieldDef> |
getKeyFields(String keys,
Map<String,FieldDef> dataSetFields)
Returns the map of the key fields for the given keys.
|
static DataSetRecord |
getRecordAfterInlcudeExclude(DataSet dataSet,
DataSetRecord record,
int cols,
boolean isSelective,
Set<String> includeFields,
Set<String> excludeFields)
Gets the record after inlcude and exclude.
|
static DataSet |
getSelectedDataSet(DataSet dataSet,
TypedKeyValue<int[],int[]> selected)
Gets the selected data set.
|
static DataSet |
intersect(DataSet drivingDataSet,
DataSet secondDataSet,
String keys)
Performs intersect operations on two data sets.
|
static DataSet |
join(DataSet drivingDataSet,
String keys,
String include,
String exclude,
TypedKeyValue<DataSet,Boolean>... dataSetsToJoin)
Joins data sets.
|
static DataSet |
keyValueDenormalization(DataSet source,
DataSet newDataSet,
String groupByColumns,
String includeColumns,
String keyColumn,
String valueColumn,
boolean ignoreCase,
boolean doTrim,
String fieldsToHave)
This transformation groups rows by combination of 'group by' fields.
|
static DataSet |
keyValueDenormalization(DataSet source,
DataSet newDataSet,
String groupByColumns,
String includeColumns,
String keyColumn,
String valueColumn,
boolean ignoreCase,
boolean doTrim,
String fieldsToHave,
String combine)
This transformation groups rows by combination of 'group by' fields.
|
static DataSet |
keyValueNormalization(DataSet source,
String normalizeColumns,
String keyColumn,
String valueColumn,
boolean ignoreEmpty)
This transformation performs key-value normalization.
|
static DataSet |
keyValueNormalization(DataSet source,
String normalizeColumns,
String keyColumn,
String valueColumn,
boolean ignoreEmpty,
String doNotNormalizeColumns)
This transformation performs key-value normalization.
|
static Object |
lookup(DataSet dataSet,
String conditions,
String fieldName)
Find the field value using given filter conditions and field name.
|
static DataSet |
matrix(DataSet drivingDataSet,
int maxColsInRow)
This transformation leaves
|
static DataSetData |
mergeData(DataSetFields fields,
DataSetData to,
DataSetRecord driving,
DataSet... from)
Merge and normalize data.
|
static DataSetFields |
mergeFields(DataSetFields... allFields)
Merge fields from different data sets.
|
static DataSet |
minus(DataSet drivingDataSet,
DataSet dataSetToMinus,
String keys)
Performs minus operations on two data sets.
|
static DataSetData |
normalizeDataSetData(DataSetFields fields,
DataSetData data,
DataSetRecord driving,
DataSet dataSet)
Normalize data set data using given common list of fields, update given
data.
|
static Map<String,DataSet> |
normalizeNestedDataSet(DataSet source,
Set<String> toExclude)
Normalize nested data set.
|
static Map<String,DataSet> |
normalizeNestedDataSet(String parentRowId,
String name,
DataSet source,
Map<String,DataSet> existingDimensions,
Set<String> toExclude)
Normalize nested data set.
|
static DataSet |
pivot(DataSet dataSet,
String theKeys,
String theFields,
String theInclude,
String theExclude,
boolean denorm,
boolean ignoreCase,
boolean doTrim,
int maxFields,
String leading)
This method performs pivoting operations on data set, such as grouping,
de-normalization, etc.
|
static DataSet |
pivot(DataSet dataSet,
String theKeys,
String theFields,
String theInclude,
String theExclude,
boolean denorm,
boolean ignoreCase,
boolean doTrim,
int maxFields,
String leading,
DataSet drivingDataSet)
This method performs pivoting operations on data set, such as grouping,
de-normalization, etc.
|
static void |
processFiles(Alias alias,
IoProcessorCallable callable,
boolean disconnect,
boolean closeIfNotDone,
boolean doneOnDone,
String ownerName,
Driver driver)
Process files for the give alias.
|
static void |
processFiles(Alias alias,
IoProcessorCallable callable,
boolean disconnect,
Driver driver)
Process files for the give alias.
|
static List<TypedKeyValue<Integer,FieldDef>> |
reorderFields(DataSetFields fields,
String pattern)
Reorder fields according to the pattern.
|
static DataSetRecord |
reorderFieldsInRecord(DataSetRecord record,
DataSetFields fields,
DataSetFields originalFields)
Reorder fields in record.
|
static DataSetRecord |
reorderFieldsInRecord(DataSetRecord newRecord,
DataSetRecord record,
DataSetFields fields,
Object keyFieldValue,
Object valueFieldValue,
String[] actualFields,
String combine)
Reorder fields in record.
|
static LinkedHashMap<String,DataSet> |
split(DataSet dataSet,
String keys)
Splits the data set on multiple data sets using given key field(s).
|
static LinkedHashMap<String,DataSet> |
split(DataSet dataSet,
String keys,
int dsSize)
Splits the data set on multiple data sets using given key field(s).
|
static List<DataSetRecord> |
splitRecord(DataSetRecord record,
int maxColsInRow,
DataSetFields fields)
Split record.
|
static DataSet |
transform(DataSet source,
List<FieldMapping> mapping)
Transform source data set into destination using given mapping.
|
static DataSet |
union(DataSet dataSet1,
DataSet dataSet2,
String keys,
boolean unionAll,
String include,
String exclude)
Performs union of the two data sets.
|
public static final String MODE_PROP
public static final String PROXY_HOST_PROP
public static final String PROXY_PORT_PROP
public static final String PASSIVE_MODE
public static final String ROW_ID_FIELD_NAME
public static final String PARENT_ROW_ID_FIELD_NAME
public static DataSet addDimensions(DataSet drivingDataSet, String keys, String addColumns, boolean keepAllFields, DataSet... dataSetsToJoin) throws Exception
drivingDataSet
- the driving data setkeys
- the key fieldsaddColumns
- the columns to add (otherwise use data set name)keepAllFields
- if true keep all columns from data set to join, otherwise only
columns which are not included in the keydataSetsToJoin
- the data sets to joinException
- in case of any errorpublic static DataSet denormalize(DataSet source, boolean cleanUp)
source
- the source data setcleanUp
- is true
the source data set will be clearedpublic static DataSet extractDimension(String keys, DataSet drivingDataSet, String name) throws Exception
keys
- the key columnsdrivingDataSet
- the driving data setname
- the nameException
- in case of any errorpublic static DataSetFields getFieldsAfterIncludeExclude(DataSet dataSet, Set<String> includeFields, Set<String> excludeFields)
dataSet
- the data setincludeFields
- the include fieldsexcludeFields
- the exclude fieldspublic static String getFieldsAsString(DataSetFields fields)
fields
- the fieldspublic static String getKey(DataSet dataSet, DataSetRecord record, Map<String,FieldDef> keys, boolean ignoreCase, boolean doTrim)
dataSet
- the data setrecord
- the recordkeys
- the key fieldsignoreCase
- if true
ignore char casedoTrim
- if true
truncate stringpublic static Map<String,String> getCaseInsensitiveKeys(String key)
key
- the keypublic static Map<String,FieldDef> getKeyFields(String keys, Map<String,FieldDef> dataSetFields)
keys
- the keysdataSetFields
- the fieldspublic static Map<String,FieldDef> getFieldsExceptExcluded(String excludedFields, Map<String,FieldDef> dataSetFields)
excludedFields
- the excluded fieldsdataSetFields
- the fieldspublic static DataSetRecord getRecordAfterInlcudeExclude(DataSet dataSet, DataSetRecord record, int cols, boolean isSelective, Set<String> includeFields, Set<String> excludeFields)
dataSet
- the data setrecord
- the source recordcols
- the number of columnsisSelective
- if true select only fields which are included or not excludedincludeFields
- the include fieldsexcludeFields
- the exclude fieldspublic static Map<String,DataSet> normalizeNestedDataSet(String parentRowId, String name, DataSet source, Map<String,DataSet> existingDimensions, Set<String> toExclude) throws Exception
parentRowId
- the parent row idname
- the namesource
- the source data setexistingDimensions
- the existing dimensionstoExclude
- the to excludeException
- the exceptionpublic static Map<String,DataSet> normalizeNestedDataSet(DataSet source, Set<String> toExclude) throws Exception
source
- the source data settoExclude
- the to excludeException
- the exceptionpublic static DataSet getSelectedDataSet(DataSet dataSet, TypedKeyValue<int[],int[]> selected)
dataSet
- the data setselected
- the selected rows and columnspublic static DataSet intersect(DataSet drivingDataSet, DataSet secondDataSet, String keys) throws Exception
drivingDataSet
- the driving data setsecondDataSet
- the second datasetkeys
- the key fieldsException
- in case of any errorpublic static DataSet pivot(DataSet dataSet, String theKeys, String theFields, String theInclude, String theExclude, boolean denorm, boolean ignoreCase, boolean doTrim, int maxFields, String leading) throws Exception
dataSet
- the data settheKeys
- the fields to "group by"theFields
- the calculated fieldstheInclude
- the fields to includetheExclude
- the fields too excludedenorm
- if true - denormalize data set at the endignoreCase
- if true - ignore case of the key fieldsdoTrim
- if true - trim key fieldsmaxFields
- the maximum number of fieldsleading
- the leading fieldsException
- in case of any errorpublic static DataSet pivot(DataSet dataSet, String theKeys, String theFields, String theInclude, String theExclude, boolean denorm, boolean ignoreCase, boolean doTrim, int maxFields, String leading, DataSet drivingDataSet) throws Exception
dataSet
- the data settheKeys
- the fields to "group by"theFields
- the calculated fieldstheInclude
- the fields to includetheExclude
- the fields too excludedenorm
- if true - denormalize data set at the endignoreCase
- if true - ignore case of the key fieldsdoTrim
- if true - trim key fieldsmaxFields
- the maximum number of fieldsleading
- the leading fieldsdrivingDataSet
- the driving data setException
- in case of any errorpublic static DataSet filter(DataSet dataSet, String conditions) throws Exception
dataSet
- the data setconditions
- the filter conditionsException
- in case of any errorpublic static Object lookup(DataSet dataSet, String conditions, String fieldName) throws Exception
dataSet
- the data setconditions
- the filter conditionsfieldName
- the field nameException
- in case of any errorpublic static DataSet filter(DataSet dataSet, String conditions, boolean keepOriginal) throws Exception
dataSet
- the data setconditions
- the filter conditionskeepOriginal
- if true keep original dataException
- in case os eny errorpublic static TypedKeyValue<String,String> getFrom(String sql)
sql
- the sqlpublic static DataSet executeSql(DataSet dataSet, String sql) throws Exception
dataSet
- the data setsql
- the sql to executeException
- in case of any errorpublic static DataSet executeSql(String sql, DataSet... dataSets) throws Exception
sql
- the sql to executedataSets
- the array of data setsException
- in case of any errorpublic static DataSet executeSql(DataSet dataSet, String sql, TypedKeyValue<String,Object>... args) throws Exception
dataSet
- the data setsql
- the sql to executeargs
- the SQL parameters as key/value pairsException
- in case of any errorpublic static DataSetFields cloneFields(DataSetFields fields)
fields
- the fieldspublic static DataSet executeSql(String sql, TypedKeyValue<String,Object>[] args, DataSet... dataSets) throws Exception
sql
- the sql to executeargs
- the SQL parameters as key/value pairsdataSets
- the array of data setsException
- in case of any errorpublic static Object getFieldValue(DataSet dataSet, String sql, String fieldName, TypedKeyValue<String,Object>... args) throws Exception
dataSet
- the data setsql
- the sql to executefieldName
- the field nameargs
- the SQL parameters as key/value pairsException
- in case of any errorpublic static Object getFieldValue(DataSet dataSet, String sql, String fieldName) throws Exception
dataSet
- the data setsql
- the sql to executefieldName
- the field nameException
- in case of any error@SafeVarargs public static DataSet join(DataSet drivingDataSet, String keys, String include, String exclude, TypedKeyValue<DataSet,Boolean>... dataSetsToJoin) throws Exception
drivingDataSet
- the driving data setkeys
- the key fieldsinclude
- the fields to includeexclude
- the fields to excludedataSetsToJoin
- the data sets to joinException
- in case of any errorpublic static DataSet keyValueDenormalization(DataSet source, DataSet newDataSet, String groupByColumns, String includeColumns, String keyColumn, String valueColumn, boolean ignoreCase, boolean doTrim, String fieldsToHave) throws Exception
Before: id attribute value 1 first_name John 1 last_name Doe 2 email test@yahoo.com 2 ssn 123 After: id first_name last_name email ssn 1 John Doe 2 test@yahoo.com 123
source
- the source datasetnewDataSet
- the new datasetgroupByColumns
- the group by columnsincludeColumns
- include these columns in addition to groupByColumnskeyColumn
- the key columnvalueColumn
- the value columnignoreCase
- if true - ignore character case when comparing key columnsdoTrim
- if true - trim column values when comparing key columnsfieldsToHave
- the comma delimited list of fields to have in the dataset.
This parameter can be null.Exception
- in case of any errorpublic static DataSet keyValueDenormalization(DataSet source, DataSet newDataSet, String groupByColumns, String includeColumns, String keyColumn, String valueColumn, boolean ignoreCase, boolean doTrim, String fieldsToHave, String combine) throws Exception
Before: id attribute value 1 first_name John 1 last_name Doe 2 email test@yahoo.com 2 ssn 123 After: id first_name last_name email ssn 1 John Doe 2 test@yahoo.com 123
source
- the source datasetnewDataSet
- the new datasetgroupByColumns
- the group by columnsincludeColumns
- include these columns in addition to groupByColumnskeyColumn
- the key columnvalueColumn
- the value columnignoreCase
- if true - ignore character case when comparing key columnsdoTrim
- if true - trim column values when comparing key columnsfieldsToHave
- the comma delimited list of fields to have in the dataset.
This parameter can be null.combine
- the field to combine key-values into (must exist)Exception
- in case of any errorpublic static DataSet keyValueNormalization(DataSet source, String normalizeColumns, String keyColumn, String valueColumn, boolean ignoreEmpty) throws Exception
Before: id first_name last_name email ssn 1 John Doe 2 test@yahoo.com 123 After: id attribute value 1 first_name John 1 last_name Doe 2 email test@yahoo.com 2 ssn 123
source
- the sourcenormalizeColumns
- the columns to normalize (transform to key-value pairs where
key is a column name and value is columns value)keyColumn
- the name of the key columnvalueColumn
- the name of the value columnignoreEmpty
- if true empty columns with empty values will be ignoredException
- in case of any errorpublic static DataSet keyValueNormalization(DataSet source, String normalizeColumns, String keyColumn, String valueColumn, boolean ignoreEmpty, String doNotNormalizeColumns) throws Exception
Before: id first_name last_name email ssn 1 John Doe 2 test@yahoo.com 123 After: id attribute value 1 first_name John 1 last_name Doe 2 email test@yahoo.com 2 ssn 123
source
- the sourcenormalizeColumns
- the columns to normalize (transform to key-value pairs where
key is a column name and value is columns value)keyColumn
- the name of the key columnvalueColumn
- the name of the value columnignoreEmpty
- if true empty columns with empty values will be ignoreddoNotNormalizeColumns
- the columns to not normalizeException
- in case of any errorpublic static DataSet matrix(DataSet drivingDataSet, int maxColsInRow)
<= maxColsInRowcolumns in one row and moves the rest to the next row.
drivingDataSet
- the data set to transformmaxColsInRow
- the max cols in row@SafeVarargs public static DataSetFields mergeFields(DataSetFields... allFields)
allFields
- the all fieldspublic static DataSetData normalizeDataSetData(DataSetFields fields, DataSetData data, DataSetRecord driving, DataSet dataSet)
fields
- the fieldsdata
- the datadriving
- the driving recorddataSet
- the data set to normalizepublic static DataSetData mergeData(DataSetFields fields, DataSetData to, DataSetRecord driving, DataSet... from)
fields
- the fieldsto
- the data to merge intodriving
- the driving recordfrom
- the array of data sets to merge data frompublic static DataSet minus(DataSet drivingDataSet, DataSet dataSetToMinus, String keys) throws Exception
drivingDataSet
- the driving data setdataSetToMinus
- the data set to joinkeys
- the key fieldsException
- in case of any errorpublic static void processFiles(Alias alias, IoProcessorCallable callable, boolean disconnect, Driver driver) throws Exception
alias
- the aliascallable
- the instance of the class which implements callable interfacedisconnect
- if true disconnect on exitException
- in case of any errorpublic static void processFiles(Alias alias, IoProcessorCallable callable, boolean disconnect, boolean closeIfNotDone, boolean doneOnDone, String ownerName, Driver driver) throws Exception
alias
- the aliascallable
- the instance of the class which implements callable interfacedisconnect
- if true disconnect on exitcloseIfNotDone
- the close if not donedoneOnDone
- the done on doneownerName
- the owner namedriver
- the driverException
- in case of any errorpublic static List<TypedKeyValue<Integer,FieldDef>> reorderFields(DataSetFields fields, String pattern)
fields
- the fieldspattern
- the patternpublic static DataSetRecord reorderFieldsInRecord(DataSetRecord record, DataSetFields fields, DataSetFields originalFields)
record
- the original recordfields
- the fieldsoriginalFields
- the original fieldspublic static DataSetRecord reorderFieldsInRecord(DataSetRecord newRecord, DataSetRecord record, DataSetFields fields, Object keyFieldValue, Object valueFieldValue, String[] actualFields, String combine)
newRecord
- the new recordrecord
- the recordfields
- the fieldskeyFieldValue
- the key field valuevalueFieldValue
- the value field valueactualFields
- the actual fieldscombine
- the combine fieldpublic static LinkedHashMap<String,DataSet> split(DataSet dataSet, String keys)
dataSet
- the data setkeys
- the keyspublic static LinkedHashMap<String,DataSet> split(DataSet dataSet, String keys, int dsSize)
dataSet
- the data setkeys
- the keysdsSize
- the maximum data set sizepublic static List<DataSetRecord> splitRecord(DataSetRecord record, int maxColsInRow, DataSetFields fields)
record
- the record to splitmaxColsInRow
- the max number of columns in the rowfields
- the fieldspublic static DataSet union(DataSet dataSet1, DataSet dataSet2, String keys, boolean unionAll, String include, String exclude) throws Exception
dataSet1
- the first data setdataSet2
- the second data setkeys
- the key fields used when unionAll == falseunionAll
- if false exclude rows with duplicated keysinclude
- the fields to includeexclude
- the fields to excludeException
- in case of any errorpublic static String getFieldName(List<FieldMapping> mapping, int index, boolean isSource)
mapping
- the mappingindex
- the index of the fieldisSource
- the is sourcepublic static Map<String,List<FieldMapping>> getFieldMappingPerDataSet(List<FieldMapping> mapping)
mapping
- the original mappingpublic static DataSet transform(DataSet source, List<FieldMapping> mapping) throws Exception
source
- the source data setmapping
- the mappingException
- in case of any errorCopyright © 2010-2020 Toolsverse. All Rights Reserved.