diff --git a/reference/svm/book.xml b/reference/svm/book.xml new file mode 100644 index 0000000000..192cca3bd2 --- /dev/null +++ b/reference/svm/book.xml @@ -0,0 +1,42 @@ + + + + + Support Vector Machine + SVM + + + &reftitle.intro; + + LibSVM is an efficient solver for SVM classification and regression problems. The svm extension wraps this in a PHP interface for easy use in PHP scripts. + + + + &reference.svm.setup; + &reference.svm.examples; + + &reference.svm.svm; + &reference.svm.svmmodel; + + + + diff --git a/reference/svm/configure.xml b/reference/svm/configure.xml new file mode 100644 index 0000000000..9c83840b0a --- /dev/null +++ b/reference/svm/configure.xml @@ -0,0 +1,34 @@ + + + +
+ &reftitle.install; + + + &pecl.info; + &url.pecl.package;svm + + +
+ + + diff --git a/reference/svm/entities.svm.xml b/reference/svm/entities.svm.xml new file mode 100644 index 0000000000..517e7b82c7 --- /dev/null +++ b/reference/svm/entities.svm.xml @@ -0,0 +1,5 @@ +&reference.svm.svm.construct; +&reference.svm.svm.crossvalidate; +&reference.svm.svm.getoptions; +&reference.svm.svm.setoptions; +&reference.svm.svm.train; diff --git a/reference/svm/entities.svmmodel.xml b/reference/svm/entities.svmmodel.xml new file mode 100644 index 0000000000..c3d4b28483 --- /dev/null +++ b/reference/svm/entities.svmmodel.xml @@ -0,0 +1,4 @@ +&reference.svm.svmmodel.construct; +&reference.svm.svmmodel.load; +&reference.svm.svmmodel.predict; +&reference.svm.svmmodel.save; diff --git a/reference/svm/examples.xml b/reference/svm/examples.xml new file mode 100644 index 0000000000..b48c8c248a --- /dev/null +++ b/reference/svm/examples.xml @@ -0,0 +1,93 @@ + + + + + &reftitle.examples; + + + The basic process is to define parameters, supply training data to generate a model on, then make predictions based on the model. There are a default set of parameters that should get some results with most any input, so we'll start by looking at the data. + + + Data is supplied in either a file, a stream, or as an an array. If supplied in a file or a stream, it must contain one line per training example, which must be formatted as an integer class (usually 1 and -1) followed by a series of feature/value pairs, in increasing feature order. The features are integers, the values floats, usually scaled 0-1. For example: + + + -1 1:0.43 3:0.12 9284:0.2 + + + In a document classification problem, say a spam checker, each line would represent a document. There would be two classes, -1 for spam, 1 for ham. Each feature would represent some word, and the value would represent that importance of that word to the document (perhaps the frequency count, with the total scaled to unit length). Features that were 0 (e.g. the word did not appear in the document at all) would simply not be included. + + + In array mode, the data must be passed as an array of arrays. Each sub-array must have the class as the first element, then key => value sets for the feature values pairs. + + + This data is passed to the SVM class's train function, which will return an SVM model is successful. + + + Once a model has been generated, it can be used to make predictions about previously unseen data. This can be passed as an array to the model's predict function, in the same format as before, but without the label. The response will be the class. + + + Models can be saved and restored as required, using the save and load functions, which both take a file location. + + + + Train from array + + 0.43, 3 => 0.12, 9284 => 0.2), + array(1, 1 => 0.22, 5 => 0.01, 94 => 0.11), +); + +$svm = new SVM(); +$model = $svm->train($data); + +$data = array(1 => 0.43, 3 => 0.12, 9284 => 0.2); +$result = $model->predict($data); +var_dump($result); +$model->save('model.svm'); +?> +]]> + + &example.outputs.similar; + + + + + + Train from a file + +train("traindata.txt"); +?> +]]> + + + + + + + diff --git a/reference/svm/setup.xml b/reference/svm/setup.xml new file mode 100644 index 0000000000..12fe605331 --- /dev/null +++ b/reference/svm/setup.xml @@ -0,0 +1,72 @@ + + + + + &reftitle.setup; + +
+ &reftitle.required; + + Libsvm itself is required, and is available through some package management - libsvm-devel for RPM based system or libsvm-dev for Debian based ones. Alternatively it is available direct from the website. If installing from the official website then some steps will need to be taken as the package does not install automatically. For example, assuming the latest version is 3.1: + + + + + + + This last step should show libsvm is installed. + +
+ +
+ &reftitle.install; + + &pecl.info; + &url.pecl.package;svm + +
+ +
+ &reftitle.runtime; + &no.config; +
+ +
+ &reftitle.resources; + &no.resource; + + + +
+ +
+ + diff --git a/reference/svm/svm.xml b/reference/svm/svm.xml new file mode 100644 index 0000000000..19eccc8105 --- /dev/null +++ b/reference/svm/svm.xml @@ -0,0 +1,366 @@ + + + + + + The SVM class + SVM + + + + +
+ &reftitle.intro; + + + +
+ + +
+ &reftitle.classsynopsis; + + + + SVM + + + + + SVM + + + + Constants + + const + integer + SVM::C_SVC + 0 + + + const + integer + SVM::NU_SVC + 1 + + + const + integer + SVM::ONE_CLASS + 2 + + + const + integer + SVM::EPSILON_SVR + 3 + + + const + integer + SVM::NU_SVR + 4 + + + const + integer + SVM::KERNEL_LINEAR + 0 + + + const + integer + SVM::KERNEL_POLY + 1 + + + const + integer + SVM::KERNEL_RBF + 2 + + + const + integer + SVM::KERNEL_SIGMOID + 3 + + + const + integer + SVM::KERNEL_PRECOMPUTED + 4 + + + const + integer + SVM::OPT_TYPE + 101 + + + const + integer + SVM::OPT_KERNEL_TYPE + 102 + + + const + integer + SVM::OPT_DEGREE + 103 + + + const + integer + SVM::OPT_SHRINKING + 104 + + + const + integer + SVM::OPT_PROPABILITY + 105 + + + const + integer + SVM::OPT_GAMMA + 201 + + + const + integer + SVM::OPT_NU + 202 + + + const + integer + SVM::OPT_EPS + 203 + + + const + integer + SVM::OPT_P + 204 + + + const + integer + SVM::OPT_COEF_ZERO + 205 + + + const + integer + SVM::OPT_C + 206 + + + const + integer + SVM::OPT_CACHE_SIZE + 207 + + + Methods + + + + +
+ + +
+ &reftitle.constants; +
+ SVM Constants + + + + SVM::C_SVC + + The basic C_SVC SVM type. The default, and a good starting point + + + + + SVM::NU_SVC + + The NU_SVC type uses a different, more flexible, error weighting + + + + + SVM::ONE_CLASS + + One class SVM type. Train just on a single class, using outliers as negative examples + + + + + SVM::EPSILON_SVR + + A SVM type for regression (predicting a value rather than just a class) + + + + + SVM::NU_SVR + + A NU style SVM regression type + + + + + SVM::KERNEL_LINEAR + + A very simple kernel, can work well on large document classification problems + + + + + SVM::KERNEL_POLY + + A polynomial kernel + + + + + SVM::KERNEL_RBF + + The common Gaussian RBD kernel. Handles non-linear problems well and is a good default for classification + + + + + SVM::KERNEL_SIGMOID + + A kernel based on the sigmoid function. Using this makes the SVM very similar to a two layer sigmoid based neural network + + + + + SVM::KERNEL_PRECOMPUTED + + A precomputed kernel - currently unsupported. + + + + + SVM::OPT_TYPE + + The options key for the SVM type + + + + + SVM::OPT_KERNEL_TYPE + + The options key for the kernel type + + + + + SVM::OPT_DEGREE + + + + + + + SVM::OPT_SHRINKING + + Training parameter, boolean, for whether to use the shrinking heuristics + + + + + SVM::OPT_PROBABILITY + + Training parmater, boolean, for whether to use probability estimates + + + + + SVM::OPT_GAMMA + + Algorithm parameter for Poly, RBF and Sigmoid kernel types. + + + + + SVM::OPT_NU + + The option key for the nu parameter, only used in the NU_ SVM types + + + + + SVM::OPT_EPS + + The option key for the Epsilon parameter, used in epsilon regression + + + + + SVM::OPT_P + + Training parameter used by Episilon SVR regression + + + + + SVM::OPT_COEF_ZERO + + Algorithm parameter for poly and sigmoid kernels + + + + + SVM::OPT_C + + The option for the cost parameter that controls tradeoff between errors and generality + + + + + SVM::OPT_CACHE_SIZE + + Memory cache size, in MB + + + +
+
+ + + +
+ + &reference.svm.entities.svm; + +
+ + diff --git a/reference/svm/svm/construct.xml b/reference/svm/svm/construct.xml new file mode 100644 index 0000000000..d4b9cb58db --- /dev/null +++ b/reference/svm/svm/construct.xml @@ -0,0 +1,56 @@ + + + + + + SVM::__construct + Construct a new SVM object + + + + &reftitle.description; + + SVM::__construct + + + + Constructs a new SVM object ready to accept training data. + + + + + + &reftitle.parameters; + &no.function.parameters; + + + + &reftitle.returnvalues; + + Throws SVMException if the libsvm library could not be loaded + + + + + + + diff --git a/reference/svm/svm/crossvalidate.xml b/reference/svm/svm/crossvalidate.xml new file mode 100644 index 0000000000..9674f5a403 --- /dev/null +++ b/reference/svm/svm/crossvalidate.xml @@ -0,0 +1,86 @@ + + + + + + SVM::crossvalidate + Test training params on subsets of the training data. + + + + &reftitle.description; + + public floatsvm::crossvalidate + arrayproblem + intnumber_of_folds + + + Crossvalidate can be used to test the effectiveness of the current parameter set on a subset of the training data. Given a problem set and a n "folds", it separates the problem set into n subsets, and the repeatedly trains on one subset and tests on another. While the accuracy will generally be lower than a SVM trained on the enter data set, the accuracy score returned should be relatively useful, so it can be used to test different training parameters. + + + + + + &reftitle.parameters; + + + + problem + + + The problem data. This can either be in the form of an array, the URL of an SVMLight formatted file, or a stream to an opened SVMLight formatted datasource. + + + + + number_of_folds + + + The number of sets the data should be divided into and cross tested. A higher number means smaller training sets and less reliability. 5 is a good number to start with. + + + + + + + + + &reftitle.returnvalues; + + The correct percentage, expressed as a floating point number from 0-1. In the case of NU_SVC or EPSILON_SVR kernels the mean squared error will returned instead. + + + + + &reftitle.seealso; + + + SVM::train + + + + + + + + + diff --git a/reference/svm/svm/getoptions.xml b/reference/svm/svm/getoptions.xml new file mode 100644 index 0000000000..19402c7d3c --- /dev/null +++ b/reference/svm/svm/getoptions.xml @@ -0,0 +1,56 @@ + + + + + + SVM::getOptions + Return the current training parameters + + + + &reftitle.description; + + public arraySVM::getOptions + + + + Retrieve an array containing the training parameters. The parameters will be keyed on the predefined SVM constants. + + + + + + &reftitle.parameters; + &no.function.parameters; + + + + &reftitle.returnvalues; + + Returns an array of configuration settings. + + + + + + + diff --git a/reference/svm/svm/setoptions.xml b/reference/svm/svm/setoptions.xml new file mode 100644 index 0000000000..b871759445 --- /dev/null +++ b/reference/svm/svm/setoptions.xml @@ -0,0 +1,65 @@ + + + + + + SVM::setOptions + Set training parameters + + + + &reftitle.description; + + public booleanSVM::setOptions + arrayparams + + + Set one or more training parameters. + + + + + + &reftitle.parameters; + + + + params + + + An array of training parameters, keyed on the SVM constants. + + + + + + + + + &reftitle.returnvalues; + + Return true on success, throws SVMException on error. + + + + + diff --git a/reference/svm/svm/train.xml b/reference/svm/svm/train.xml new file mode 100644 index 0000000000..0e461f1822 --- /dev/null +++ b/reference/svm/svm/train.xml @@ -0,0 +1,80 @@ + + + + + + SVM::train + Create a SVMModel based on training data + + + + &reftitle.description; + + public SVMModelsvm::train + arrayproblem + arrayweights + + + Train a support vector machine based on the supplied training data. + + + + + + &reftitle.parameters; + + + + problem + + + The problem can be provided in three different ways. + An array, where the data should start with the class label (usually 1 or -1) then followed by a sparse data set of dimension => data pairs. + A URL to a file containing a SVM Light formatted problem, with the each line being a new training example, the start of each line containing the class (1, -1) then a series of tab separated data values shows as key:value. + A opened stream pointing to a data source formatted as in the file above. + + + + + weights + + + Weights are an optional set of weighting parameters for the different classes, to help account for unbalanced training sets. For example, if the classes were 1 and -1, and -1 had significantly more example than one, the weight for -1 could be 0.5. Weights should be in the range 0-1. + + + + + + + + + &reftitle.returnvalues; + + Returns an SVMModel that can be used to classify previously unseen data. + Throws SVMException on error + + + + + + + diff --git a/reference/svm/svmexception.xml b/reference/svm/svmexception.xml new file mode 100644 index 0000000000..0ce91b7918 --- /dev/null +++ b/reference/svm/svmexception.xml @@ -0,0 +1,112 @@ + + + + + + The SVMException class + SVMException + + + + +
+ &reftitle.intro; + + The exception object thrown on errors from the SVM and SVMModel classes. + +
+ + +
+ &reftitle.classsynopsis; + + + + SVMException + + + + + SVMException + + + + extends + Exception + + + + Properties + + + Methods + + + Inherited methods + + + + + +
+ + + +
+ &reftitle.properties; + + + message + + + + + + code + + + + + + file + + + + + + line + + + + + +
+ + + +
+ + &reference.svm.entities.svmexception; + +
+ + diff --git a/reference/svm/svmmodel.xml b/reference/svm/svmmodel.xml new file mode 100644 index 0000000000..d0fcb39519 --- /dev/null +++ b/reference/svm/svmmodel.xml @@ -0,0 +1,67 @@ + + + + + + The SVMModel class + SVMModel + + + + +
+ &reftitle.intro; + + The SVMModel is the end result of the training process. It can be used to classify previously unseen data. + +
+ + +
+ &reftitle.classsynopsis; + + + + SVMModel + + + + + SVMModel + + + + + Methods + + + + +
+ +
+ + &reference.svm.entities.svmmodel; + +
+ + diff --git a/reference/svm/svmmodel/construct.xml b/reference/svm/svmmodel/construct.xml new file mode 100644 index 0000000000..80a6235a83 --- /dev/null +++ b/reference/svm/svmmodel/construct.xml @@ -0,0 +1,75 @@ + + + + + + SVMModel::__construct + Construct a new SVMModel + + + + &reftitle.description; + + SVMModel::__construct + stringfilename + + + Build a new SVMModel. Models will usually be created from the SVM::train function, but then saved models may be restored directly. + + + + + + &reftitle.parameters; + + + + filename + + + The filename for the saved model file this model should load. + + + + + + + + + &reftitle.returnvalues; + + Throws SVMException on error + + + + + &reftitle.seealso; + + + SVMModel::load + + + + + + + diff --git a/reference/svm/svmmodel/load.xml b/reference/svm/svmmodel/load.xml new file mode 100644 index 0000000000..d5d695d7fb --- /dev/null +++ b/reference/svm/svmmodel/load.xml @@ -0,0 +1,76 @@ + + + + + + SVMModel::load + Load a saved SVM Model + + + + &reftitle.description; + + public booleanSVMModel::load + stringfilename + + + Load a model file ready for classification or regression. + + + + + + &reftitle.parameters; + + + + filename + + + The filename of the model. + + + + + + + + + &reftitle.returnvalues; + + Throws SVMException on error. + Returns true on success. + + + + + &reftitle.seealso; + + + SVMModel::save + + + + + + + diff --git a/reference/svm/svmmodel/predict.xml b/reference/svm/svmmodel/predict.xml new file mode 100644 index 0000000000..4a7e0bb1ce --- /dev/null +++ b/reference/svm/svmmodel/predict.xml @@ -0,0 +1,77 @@ + + + + + + SVMModel::predict + Predict a value for previously unseen data + + + + &reftitle.description; + + public floatSVMModel::predict + arraydata + + + This function accepts an array of data and attempts to predict the class or regression value based on the model extracted from previously trained data. + + + + + + &reftitle.parameters; + + + + data + + + The array to be classified. This should be a series of key => value pairs in increasing key order, but not necessarily continuous. + + + + + + + + + &reftitle.returnvalues; + + Float the predicted value. This will be a class label in the case of classification, a real value in the case of regression. + Throws SVMException on error + + + + + &reftitle.seealso; + + + SVM::train + + + + + + + + diff --git a/reference/svm/svmmodel/save.xml b/reference/svm/svmmodel/save.xml new file mode 100644 index 0000000000..d5809be7e8 --- /dev/null +++ b/reference/svm/svmmodel/save.xml @@ -0,0 +1,76 @@ + + + + + + SVMModel::save + Save a model to a file + + + + &reftitle.description; + + public booleanSVMModel::save + stringfilename + + + Save the model data to a file, for later use. + + + + + + &reftitle.parameters; + + + + filename + + + The file to save the model to. + + + + + + + + + &reftitle.returnvalues; + + Throws SVMException on error. + Returns true on success. + + + + + &reftitle.seealso; + + + SVMModel::load + + + + + + + diff --git a/reference/svm/versions.xml b/reference/svm/versions.xml new file mode 100644 index 0000000000..1b1ec2b408 --- /dev/null +++ b/reference/svm/versions.xml @@ -0,0 +1,48 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + +