C++ example: CurlSource
The CurlSource example allows you to use cURL to open and read in a file over HTTP. The example provided is part of: /opt/vertica/sdk/examples/SourceFunctions/cURL.cpp
.
Source implementation
This example uses the helper library available in /opt/vertica/sdk/examples/HelperLibraries/
.
CurlSource loads the data in chunks. If the parser encounters an EndOfFile marker, then the process()
method returns DONE. Otherwise, the method returns OUTPUT_NEEDED and processes another chunk of data. The functions included in the helper library (such as url_fread()
and url_fopen()
) are based on examples that come with the libcurl library. For an example, see http://curl.haxx.se/libcurl/c/fopen.html.
The setup()
function opens a file handle and the destroy()
function closes it. Both use functions from the helper library.
class CurlSource : public UDSource {private:
URL_FILE *handle;
std::string url;
virtual StreamState process(ServerInterface &srvInterface, DataBuffer &output) {
output.offset = url_fread(output.buf, 1, output.size, handle);
return url_feof(handle) ? DONE : OUTPUT_NEEDED;
}
public:
CurlSource(std::string url) : url(url) {}
void setup(ServerInterface &srvInterface) {
handle = url_fopen(url.c_str(),"r");
}
void destroy(ServerInterface &srvInterface) {
url_fclose(handle);
}
};
Factory implementation
CurlSourceFactory
produces CurlSource
instances.
class CurlSourceFactory : public SourceFactory {public:
virtual void plan(ServerInterface &srvInterface,
NodeSpecifyingPlanContext &planCtxt) {
std::vector<std::string> args = srvInterface.getParamReader().getParamNames();
/* Check parameters */
if (args.size() != 1 || find(args.begin(), args.end(), "url") == args.end()) {
vt_report_error(0, "You must provide a single URL.");
}
/* Populate planData */
planCtxt.getWriter().getStringRef("url").copy(
srvInterface.getParamReader().getStringRef("url"));
/* Assign Nodes */
std::vector<std::string> executionNodes = planCtxt.getClusterNodes();
while (executionNodes.size() > 1) executionNodes.pop_back();
// Only run on the first node in the list.
planCtxt.setTargetNodes(executionNodes);
}
virtual std::vector<UDSource*> prepareUDSources(ServerInterface &srvInterface,
NodeSpecifyingPlanContext &planCtxt) {
std::vector<UDSource*> retVal;
retVal.push_back(vt_createFuncObj(srvInterface.allocator, CurlSource,
planCtxt.getReader().getStringRef("url").str()));
return retVal;
}
virtual void getParameterType(ServerInterface &srvInterface,
SizedColumnTypes ¶meterTypes) {
parameterTypes.addVarchar(65000, "url");
}
};
RegisterFactory(CurlSourceFactory);