Java example: ReplaceCharFilter
The example in this section demonstrates creating a UDFilter
that replaces any occurrences of a character in the input stream with another character in the output stream. This example is highly simplified and assumes the input stream is ASCII data.
Always remember that the input and output streams in a UDFilter
are actually binary data. If you are performing character transformations using a UDFilter
, convert the data stream from a string of bytes into a properly encoded string. For example, your input stream might consist of UTF-8 encoded text. If so, be sure to transform the raw binary being read from the buffer into a UTF string before manipulating it.
Loading and using the example
The example UDFilter
has two required parameters. The from_char
parameter specifies the character to be replaced, and the to_char
parameter specifies the replacement character. Load and use the ReplaceCharFilter
UDFilter as follows:
=> CREATE LIBRARY JavaLib AS '/home/dbadmin/JavaUDlLib.jar'
->LANGUAGE 'JAVA';
CREATE LIBRARY
=> CREATE FILTER ReplaceCharFilter as LANGUAGE 'JAVA'
->name 'com.mycompany.UDL.ReplaceCharFilterFactory' library JavaLib;
CREATE FILTER FUNCTION
=> CREATE TABLE t (text VARCHAR);
CREATE TABLE
=> COPY t FROM STDIN WITH FILTER ReplaceCharFilter(from_char='a', to_char='z');
Enter data to be copied followed by a newline.
End with a backslash and a period on a line by itself.
>> Mary had a little lamb
>> a man, a plan, a canal, Panama
>> \.
=> SELECT * FROM t;
text
--------------------------------
Mzry hzd z little lzmb
z mzn, z plzn, z cznzl, Pznzmz
(2 rows)
=> --Calling the filter with incorrect parameters returns errors
=> COPY t from stdin with filter ReplaceCharFilter();
ERROR 3399: Failure in UDx RPC call InvokePlanUDL(): Error in User Defined Object [
ReplaceCharFilter], error code: 0
com.vertica.sdk.UdfException: You must supply two parameters to ReplaceChar: 'from_char' and 'to_char'
at com.vertica.JavaLibs.ReplaceCharFilterFactory.plan(ReplaceCharFilterFactory.java:22)
at com.vertica.udxfence.UDxExecContext.planUDFilter(UDxExecContext.java:889)
at com.vertica.udxfence.UDxExecContext.planCurrentUDLType(UDxExecContext.java:865)
at com.vertica.udxfence.UDxExecContext.planUDL(UDxExecContext.java:821)
at com.vertica.udxfence.UDxExecContext.run(UDxExecContext.java:242)
at java.lang.Thread.run(Thread.java:662)
Parser implementation
The ReplaceCharFilter
class reads the data stream, replacing each occurrence of a user-specified character with another character.
package com.vertica.JavaLibs;
import com.vertica.sdk.DataBuffer;
import com.vertica.sdk.ServerInterface;
import com.vertica.sdk.State.InputState;
import com.vertica.sdk.State.StreamState;
import com.vertica.sdk.UDFilter;
public class ReplaceCharFilter extends UDFilter {
private byte[] fromChar;
private byte[] toChar;
public ReplaceCharFilter(String fromChar, String toChar){
// Stores the from char and to char as byte arrays. This is
// not a robust method of doing this, but works for this simple
// example.
this.fromChar= fromChar.getBytes();
this.toChar=toChar.getBytes();
}
@Override
public StreamState process(ServerInterface srvInterface, DataBuffer input,
InputState input_state, DataBuffer output) {
// Check if there is no more input and the input buffer has been completely
// processed. If so, filtering is done.
if (input_state == InputState.END_OF_FILE && input.buf.length == 0) {
return StreamState.DONE;
}
// Get current position in the input buffer
int offset = output.offset;
// Determine how many bytes to process. This is either until input
// buffer is exhausted or output buffer is filled
int limit = Math.min((input.buf.length - input.offset),
(output.buf.length - output.offset));
for (int i = input.offset; i < limit; i++) {
// This example just replaces each instance of from_char
// with to_char. It does not consider things such as multi-byte
// UTF-8 characters.
if (input.buf[i] == fromChar[0]) {
output.buf[i+offset] = toChar[0];
} else {
// Did not find from_char, so copy input to the output
output.buf[i+offset]=input.buf[i];
}
}
input.offset += limit;
output.offset += input.offset;
if (input.buf.length - input.offset < output.buf.length - output.offset) {
return StreamState.INPUT_NEEDED;
} else {
return StreamState.OUTPUT_NEEDED;
}
}
}
Factory implementation
ReplaceCharFilterFactory
requires two parameters (from_char
and to_char
). The plan()
method verifies that these parameters exist and are single-character strings. The method then stores them in the plan context. The prepare()
method gets the parameter values and passes them to the ReplaceCharFilter
objects, which it instantiates, to perform the filtering.
package com.vertica.JavaLibs;
import java.util.ArrayList;
import java.util.Vector;
import com.vertica.sdk.FilterFactory;
import com.vertica.sdk.PlanContext;
import com.vertica.sdk.ServerInterface;
import com.vertica.sdk.SizedColumnTypes;
import com.vertica.sdk.UDFilter;
import com.vertica.sdk.UdfException;
public class ReplaceCharFilterFactory extends FilterFactory {
// Run on the initiator node to perform varification and basic setup.
@Override
public void plan(ServerInterface srvInterface,PlanContext planCtxt)
throws UdfException {
ArrayList<String> args =
srvInterface.getParamReader().getParamNames();
// Ensure user supplied two arguments
if (!(args.contains("from_char") && args.contains("to_char"))) {
throw new UdfException(0, "You must supply two parameters" +
" to ReplaceChar: 'from_char' and 'to_char'");
}
// Verify that the from_char is a single character.
String fromChar = srvInterface.getParamReader().getString("from_char");
if (fromChar.length() != 1) {
String message = String.format("Replacechar expects a single " +
"character in the 'from_char' parameter. Got length %d",
fromChar.length());
throw new UdfException(0, message);
}
// Save the from character in the plan context, to be read by
// prepare() method.
planCtxt.getWriter().setString("fromChar",fromChar);
// Ensure to character parameter is a single characater
String toChar = srvInterface.getParamReader().getString("to_char");
if (toChar.length() != 1) {
String message = String.format("Replacechar expects a single "
+ "character in the 'to_char' parameter. Got length %d",
toChar.length());
throw new UdfException(0, message);
}
// Save the to character in the plan data
planCtxt.getWriter().setString("toChar",toChar);
}
// Called on every host that will filter data. Must instantiate the
// UDFilter subclass.
@Override
public UDFilter prepare(ServerInterface srvInterface, PlanContext planCtxt)
throws UdfException {
// Get data stored in the context by the plan() method.
String fromChar = planCtxt.getWriter().getString("fromChar");
String toChar = planCtxt.getWriter().getString("toChar");
// Instantiate a filter object to perform filtering.
return new ReplaceCharFilter(fromChar, toChar);
}
// Describe the parameters accepted by this filter.
@Override
public void getParameterType(ServerInterface srvInterface,
SizedColumnTypes parameterTypes) {
parameterTypes.addVarchar(1, "from_char");
parameterTypes.addVarchar(1, "to_char");
}
}