API: Add nearest neighbor 59/1059/2
authorTasuku Suzuki <stasuku@gmail.com>
Sun, 17 Dec 2017 15:54:37 +0000 (00:54 +0900)
committerstasuku <stasuku@gmail.com>
Sun, 17 Dec 2017 15:56:51 +0000 (00:56 +0900)
Change-Id: I8a0e37fef38eff81ea6c91eb97b558259230c580
Reviewed-on: http://codereview.qt-users.jp/1059
Reviewed-by: <stasuku@gmail.com>
Tested-by: <stasuku@gmail.com>
examples/jubatus/cpp/cpp.pro
examples/jubatus/cpp/nearestneighbor/AAAI/AAAI.json [new file with mode: 0644]
examples/jubatus/cpp/nearestneighbor/AAAI/AAAI.pro [new file with mode: 0644]
examples/jubatus/cpp/nearestneighbor/AAAI/main.cpp [new file with mode: 0644]
examples/jubatus/cpp/nearestneighbor/nearestneighbor.pro [new file with mode: 0644]
src/jubatus/jubatus.pro
src/jubatus/nearestneighbor/nearestneighbor.pri [new file with mode: 0644]
src/jubatus/nearestneighbor/qjubatusnearestneighbor.cpp [new file with mode: 0644]
src/jubatus/nearestneighbor/qjubatusnearestneighbor.h [new file with mode: 0644]

index 0e570c2..cd6aab1 100644 (file)
@@ -1,2 +1,2 @@
 TEMPLATE = subdirs
-SUBDIRS += anomaly burst classifier recommender regression
+SUBDIRS += anomaly burst classifier nearestneighbor recommender regression
diff --git a/examples/jubatus/cpp/nearestneighbor/AAAI/AAAI.json b/examples/jubatus/cpp/nearestneighbor/AAAI/AAAI.json
new file mode 100644 (file)
index 0000000..578a7e7
--- /dev/null
@@ -0,0 +1,21 @@
+{
+  "converter" : {
+    "string_filter_types": {},
+    "string_filter_rules":[],
+    "num_filter_types": {},
+    "num_filter_rules": [],
+    "string_types": {
+    },
+    "string_rules":[
+      { "key" : "*", "type" : "space", "sample_weight" : "tf", "global_weight" : "bm25" }
+    ],
+    "num_types": {},
+    "num_rules": [
+      {"key" : "*", "type" : "num"}
+    ]
+  },
+  "parameter" : {
+    "hash_num" : 512
+  },
+  "method": "lsh"
+}
diff --git a/examples/jubatus/cpp/nearestneighbor/AAAI/AAAI.pro b/examples/jubatus/cpp/nearestneighbor/AAAI/AAAI.pro
new file mode 100644 (file)
index 0000000..99c5cd3
--- /dev/null
@@ -0,0 +1,11 @@
+TEMPLATE = app
+TARGET = jubatus-AAAI
+QT = core network jubatus
+CONFIG += c++11
+SOURCES = main.cpp
+
+OTHER_FILES += AAAI.json
+
+target.path = $$[QT_INSTALL_EXAMPLES]/jubatus/cpp/nearestneighbor/AAAI
+INSTALLS += target
+
diff --git a/examples/jubatus/cpp/nearestneighbor/AAAI/main.cpp b/examples/jubatus/cpp/nearestneighbor/AAAI/main.cpp
new file mode 100644 (file)
index 0000000..a70c726
--- /dev/null
@@ -0,0 +1,100 @@
+#include <QtCore>
+#include <QtNetwork>
+#include <QtJubatus>
+
+class SimpleCSV : public QList<QStringList>
+{
+public:
+    SimpleCSV(const QString &source);
+};
+
+
+SimpleCSV::SimpleCSV(const QString &source)
+{
+    QLatin1String crlf("\n");
+    QLatin1Char quote('"');
+    QLatin1Char delimiter(',');
+
+    // 1. fix record
+    QStringList records;
+    QStringList lines = source.split(crlf);
+    bool next = false;
+    for (int i = 0; i < lines.length(); i++) {
+        const QString &line = lines.at(i);
+        if (next) {
+            records.last().append(crlf);
+            records.last().append(line);
+            next = (line.count(quote) % 2 == 0);
+        } else {
+            records.append(line);
+            next = (line.count(quote) % 2 == 1);
+        }
+    }
+
+    // 2. fix fields
+    for (const QString &record : records) {
+//        qDebug() << record;
+        QString r = record;
+        QStringList strings = r.replace(QStringLiteral("\"\""), QStringLiteral("\b")).split(delimiter);
+        QStringList fields;
+        bool next = false;
+        for (int i = 0; i < strings.length(); i++) {
+            QString string = strings.at(i);
+            if (next) {
+                fields.last().append(delimiter);
+                fields.last().append(string);
+                next = !string.endsWith(quote);
+            } else {
+                if (string == QStringLiteral("\b")) {
+                    fields.append(QStringLiteral(""));
+                } else if (string.startsWith(quote)) {
+                    fields.append(string);
+                    next = !string.endsWith(quote);
+                } else {
+                    fields.append(string);
+                }
+            }
+        }
+
+        QStringList data;
+        for (QString field : fields) {
+            field.replace(QStringLiteral("\b"), QStringLiteral("\""));
+            if (field.startsWith(quote) && field.endsWith(quote))
+                field = field.mid(1, field.length() - 2);
+            data.append(field);
+        }
+//        qDebug() << data;
+        append(data);
+    }
+}
+
+int main(int argc, char **argv)
+{
+    QCoreApplication app(argc, argv);
+
+    QNetworkAccessManager nam;
+    QNetworkRequest request(QUrl("https://archive.ics.uci.edu/ml/machine-learning-databases/00307/%5bUCI%5d%20AAAI-14%20Accepted%20Papers%20-%20Papers.csv"));
+    request.setAttribute(QNetworkRequest::SynchronousRequestAttribute, true);
+    QNetworkReply *reply = nam.get(request);
+
+    SimpleCSV csv(QString::fromUtf8(reply->readAll()));
+    csv.takeFirst();
+
+    QJubatusNearestNeighbor nn;
+    nn.clear();
+
+    int i = 0;
+    for (const QStringList &fields : csv) {
+        QString id = QString::number(i++);
+        QVariantMap data;
+        data.insert("title", fields.at(0));
+//        data.insert("abstract", fields.at(5));
+        nn.setRow(id, data);
+    }
+
+    for (const QJubatusNearestNeighbor::IdWithScore &iws : nn.similarRowFromId("1", 4)) {
+        qDebug() << csv.at(iws.id.toInt()).at(0) << iws.score;
+    }
+
+    return 0;
+}
diff --git a/examples/jubatus/cpp/nearestneighbor/nearestneighbor.pro b/examples/jubatus/cpp/nearestneighbor/nearestneighbor.pro
new file mode 100644 (file)
index 0000000..4522fda
--- /dev/null
@@ -0,0 +1,2 @@
+TEMPLATE = subdirs
+SUBDIRS += AAAI
index 31e5d14..e5e5f81 100644 (file)
@@ -18,6 +18,7 @@ include(./client/client.pri)
 include(./anomaly/anomaly.pri)
 include(./burst/burst.pri)
 include(./classifier/classifier.pri)
+include(./clustering/clustering.pri)
+include(./nearestneighbor/nearestneighbor.pri)
 include(./recommender/recommender.pri)
 include(./regression/regression.pri)
-include(./clustering/clustering.pri)
diff --git a/src/jubatus/nearestneighbor/nearestneighbor.pri b/src/jubatus/nearestneighbor/nearestneighbor.pri
new file mode 100644 (file)
index 0000000..beac96c
--- /dev/null
@@ -0,0 +1,5 @@
+INCLUDEPATH += $$PWD
+DEPENDPATH += $$PWD
+
+HEADERS += $$PWD/qjubatusnearestneighbor.h
+SOURCES += $$PWD/qjubatusnearestneighbor.cpp
diff --git a/src/jubatus/nearestneighbor/qjubatusnearestneighbor.cpp b/src/jubatus/nearestneighbor/qjubatusnearestneighbor.cpp
new file mode 100644 (file)
index 0000000..c7f9bb0
--- /dev/null
@@ -0,0 +1,116 @@
+/* Copyright (c) 2012 Silk Project.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Silk nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL SILK BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "qjubatusnearestneighbor.h"
+
+#include <QtCore/QDebug>
+
+#include <vector>
+
+#include <jubatus/client/nearest_neighbor_client.hpp>
+
+QJubatusNearestNeighbor::QJubatusNearestNeighbor(QObject *parent)
+    : QJubatusClient(parent)
+{
+}
+
+bool QJubatusNearestNeighbor::clear()
+{
+    bool ret = false;
+    EXEC_JUBATUS_COMMAND( ret = client()->clear(); )
+    return ret;
+}
+
+bool QJubatusNearestNeighbor::setRow(const QString &id, const QVariantMap &data)
+{
+    bool ret = false;
+    EXEC_JUBATUS_COMMAND( ret = client()->set_row(convert(id), convert(data)); )
+    return ret;
+}
+
+QList<QJubatusNearestNeighbor::IdWithScore> QJubatusNearestNeighbor::neighborRowFromId(const QString &id, uint size)
+{
+    QList<QJubatusNearestNeighbor::IdWithScore> ret;
+    EXEC_JUBATUS_COMMAND( ret = convert(client()->neighbor_row_from_id(convert(id), size)); )
+    return ret;
+}
+
+QList<QJubatusNearestNeighbor::IdWithScore> QJubatusNearestNeighbor::neighborRowFromDatum(const QVariantMap &query, uint size)
+{
+    QList<QJubatusNearestNeighbor::IdWithScore> ret;
+    EXEC_JUBATUS_COMMAND( ret = convert(client()->neighbor_row_from_datum(convert(query), size)); )
+    return ret;
+}
+
+QList<QJubatusNearestNeighbor::IdWithScore> QJubatusNearestNeighbor::similarRowFromId(const QString &id, uint size)
+{
+    QList<QJubatusNearestNeighbor::IdWithScore> ret;
+    EXEC_JUBATUS_COMMAND( ret = convert(client()->similar_row_from_id(convert(id), size)); )
+    return ret;
+}
+QList<QJubatusNearestNeighbor::IdWithScore> QJubatusNearestNeighbor::similarRowFromDatum(const QVariantMap &query, uint size)
+{
+    QList<QJubatusNearestNeighbor::IdWithScore> ret;
+    EXEC_JUBATUS_COMMAND( ret = convert(client()->similar_row_from_datum(convert(query), size)); )
+    return ret;
+}
+
+QStringList QJubatusNearestNeighbor::getAllRows()
+{
+    QStringList ret;
+    EXEC_JUBATUS_COMMAND( ret = convert(client()->get_all_rows()); )
+    return ret;
+}
+
+jubatus::nearest_neighbor::id_with_score QJubatusNearestNeighbor::convert(const QJubatusNearestNeighbor::IdWithScore &data) const
+{
+    jubatus::nearest_neighbor::id_with_score ret;
+    ret.id = convert(data.id);
+    ret.score = data.score;
+    return ret;
+}
+
+QJubatusNearestNeighbor::IdWithScore QJubatusNearestNeighbor::convert(const jubatus::nearest_neighbor::id_with_score &data) const
+{
+    IdWithScore ret;
+    ret.id = convert(data.id);
+    ret.score = data.score;
+    return ret;
+}
+
+QList<QJubatusNearestNeighbor::IdWithScore> QJubatusNearestNeighbor::convert(const std::vector<jubatus::nearest_neighbor::id_with_score> &data) const
+{
+    QList<QJubatusNearestNeighbor::IdWithScore> ret;
+    foreach (const jubatus::nearest_neighbor::id_with_score id_with_score, data) {
+        ret.append(convert(id_with_score));
+    }
+    return ret;
+}
+
+jubatus::nearest_neighbor::client::nearest_neighbor *QJubatusNearestNeighbor::client()
+{
+    return QJubatusClient::client<jubatus::nearest_neighbor::client::nearest_neighbor>();
+}
diff --git a/src/jubatus/nearestneighbor/qjubatusnearestneighbor.h b/src/jubatus/nearestneighbor/qjubatusnearestneighbor.h
new file mode 100644 (file)
index 0000000..65d6bee
--- /dev/null
@@ -0,0 +1,74 @@
+/* Copyright (c) 2012 Silk Project.
+ * All rights reserved.
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of the Silk nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL SILK BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef QJUBATUSNEAREST_NEIGHBOR_H
+#define QJUBATUSNEAREST_NEIGHBOR_H
+
+#include "jubatus_global.h"
+#include "qjubatusclient.h"
+
+#include <QtCore/QVariant>
+
+namespace jubatus {
+    namespace nearest_neighbor {
+        struct id_with_score;
+        namespace client {
+            class nearest_neighbor;
+        }
+    }
+}
+
+class JUBATUS_EXPORT QJubatusNearestNeighbor : public QJubatusClient
+{
+    Q_OBJECT
+public:
+    explicit QJubatusNearestNeighbor(QObject *parent = nullptr);
+
+    struct IdWithScore {
+        IdWithScore() : score(0.0) {}
+        QString id;
+        float score;
+    };
+
+    bool clear();
+    bool setRow(const QString &id, const QVariantMap &data);
+    QList<IdWithScore> neighborRowFromId(const QString &id, uint size);
+    QList<IdWithScore> neighborRowFromDatum(const QVariantMap &query, uint size);
+    QList<IdWithScore> similarRowFromId(const QString &id, uint size);
+    QList<IdWithScore> similarRowFromDatum(const QVariantMap &query, uint size);
+    QStringList getAllRows();
+
+protected:
+    using QJubatusClient::convert;
+    jubatus::nearest_neighbor::id_with_score convert(const IdWithScore &data) const;
+    IdWithScore convert(const jubatus::nearest_neighbor::id_with_score &data) const;
+    QList<IdWithScore> convert(const std::vector<jubatus::nearest_neighbor::id_with_score> &data) const;
+
+private:
+    jubatus::nearest_neighbor::client::nearest_neighbor *client();
+};
+
+#endif // QJUBATUSNEAREST_NEIGHBOR_H