UpperConfidenceBound

Wei-1 · Wei-1 · commit 4fa3bd31a694 · 2020-03-08T12:45:12.000+08:00
diff --git a/README.md b/README.md
@@ -88,6 +88,8 @@ A very light weight Scala machine learning library that provide some basic ML al
 
 - [x] Epsilon Greedy Search [[Code]](src/main/scala/algorithm/optimization/EpsilonGreedy.scala) [[Usage]](src/test/scala/algorithm/optimization/EpsilonGreedyTest.scala)
 
+- [x] Upper Confidence Bound [[Code]](src/main/scala/algorithm/optimization/UpperConfidenceBound.scala) [[Usage]](src/test/scala/algorithm/optimization/UpperConfidenceBoundTest.scala)
+
 ### Reinforcement Learning :
 
 - [x] Naive Feedback [[Code]](src/main/scala/algorithm/reinforcement/NaiveFeedback.scala) [[Usage]](src/test/scala/algorithm/reinforcement/NaiveFeedbackTest.scala)
diff --git a/src/main/scala/algorithm/optimization/EpsilonGreedy.scala b/src/main/scala/algorithm/optimization/EpsilonGreedy.scala
@@ -16,7 +16,7 @@ class EpsilonGreedy {
         if (scores != null)
             currentScores = scores
         if (currentScores == null)
-            currentScores = new Array[Double](size)
+            currentScores = Array.fill[Double](size)(Double.MinValue)
         if (math.random < epsilon) {
             val randSelect = (math.random * size).toInt
             val value = evaluation(choices(randSelect))
diff --git a/src/main/scala/algorithm/optimization/UpperConfidenceBound.scala b/src/main/scala/algorithm/optimization/UpperConfidenceBound.scala
@@ -0,0 +1,39 @@
+// Wei Chen - Upper Confidence Bound
+// 2020-03-08
+
+package com.scalaml.algorithm
+
+class UpperConfidenceBound {
+    var currentStats: Array[(Double, Int)] = null
+
+    def select(c: Double): Int = {
+        val n = currentStats.count(_._2 > 0)
+        val currentScores = currentStats.map { case (m, kn) =>
+            m + c * math.sqrt(math.log(n + 1) / (kn + 1e-12))
+        }
+        currentScores.indexOf(currentScores.max)
+    }
+
+    def add(i: Int, value: Double) {
+        val (currentValue, currentCount) = currentStats(i)
+        val newValue = (currentValue * currentCount + value) / (currentCount + 1)
+        currentStats(i) = (newValue, currentCount + 1)
+    }
+
+    def search(
+        evaluation: Array[Double] => Double,
+        choices: Array[Array[Double]],
+        scores: Array[(Double, Int)] = null,
+        c: Double = 1
+    ): Array[Double] = {
+        val size = choices.size
+        if (scores != null)
+            currentStats = scores
+        if (currentStats == null)
+            currentStats = Array.fill[(Double, Int)](size)((0, 0))
+        val currentSelect = select(c)
+        val value = evaluation(choices(currentSelect))
+        add(currentSelect, value)
+        choices(currentStats.indexOf(currentStats.maxBy(_._1)))
+    }
+}
diff --git a/src/test/scala/algorithm/optimization/EpsilonGreedyTest.scala b/src/test/scala/algorithm/optimization/EpsilonGreedyTest.scala
@@ -20,11 +20,11 @@ class EpsilonGreedySuite extends AnyFunSuite {
     )
     val epsilon: Double = 0.1
 
-    test("GeneAlgorithm Test : Initial") {
+    test("EpsilonGreedy Test : Initial") {
         assert(eg.currentScores == null)
     }
 
-    test("GeneAlgorithm Test : Search - Start") {
+    test("EpsilonGreedy Test : Search - Start") {
         for (i <- 0 until 1000)
             eg.search(evaluation, choices, null, epsilon)
         assert(eg.currentScores.size == choices.size)
@@ -33,7 +33,7 @@ class EpsilonGreedySuite extends AnyFunSuite {
         assert((best.head - 0.7).abs < 0.05)
     }
 
-    test("GeneAlgorithm Test : Search - Continue") {
+    test("EpsilonGreedy Test : Search - Continue") {
         var scores: Array[Double] = Array(0, 0, 1 / 1.3, 0)
         for (i <- 0 until 1000) {
             eg.search(evaluation, choices, scores, epsilon)
diff --git a/src/test/scala/algorithm/optimization/UpperConfidenceBoundTest.scala b/src/test/scala/algorithm/optimization/UpperConfidenceBoundTest.scala
@@ -0,0 +1,53 @@
+// Wei Chen - Upper Confidence Bound Test
+// 2020-03-08
+
+import com.scalaml.general.MatrixFunc._
+import com.scalaml.algorithm.UpperConfidenceBound
+import org.scalatest.funsuite.AnyFunSuite
+
+class UpperConfidenceBoundSuite extends AnyFunSuite {
+
+    val ucb = new UpperConfidenceBound()
+
+
+    def evaluation(arr: Array[Double]): Double = 1 / ((arr.head - 0.7).abs + 1)
+
+    val choices: Array[Array[Double]] = Array(
+        Array(0.7),
+        Array(0.8),
+        Array(1.0),
+        Array(0.5)
+    )
+    val c: Double = 1
+
+    test("UpperConfidenceBound Test : Initial") {
+        assert(ucb.currentStats == null)
+    }
+
+    test("UpperConfidenceBound Test : Search - Start") {
+        for (i <- 0 until 100)
+            ucb.search(evaluation, choices, null, c)
+        assert(ucb.currentStats.size == choices.size)
+
+        val best = ucb.search(evaluation, choices, null, c)
+        assert((best.head - 0.7).abs < 0.05)
+    }
+
+    test("UpperConfidenceBound Test : Search - Continue") {
+        var stats: Array[(Double, Int)] = Array(
+            (0, 0),
+            (0, 0),
+            (1 / 1.3, 1),
+            (0, 0)
+        )
+        for (i <- 0 until 100) {
+            ucb.search(evaluation, choices, stats, c)
+            stats = ucb.currentStats
+        }
+        assert(ucb.currentStats.size == stats.size)
+
+        val best = ucb.search(evaluation, choices, stats, c)
+        assert((best.head - 0.7).abs < 0.05)
+    }
+
+}

Original file line number	Diff line number	Diff line change
`@@ -20,11 +20,11 @@ class EpsilonGreedySuite extends AnyFunSuite {`
`20`	`20`	`)`
`21`	`21`	`val epsilon: Double = 0.1`
`22`	`22`
`23`		`- test("GeneAlgorithm Test : Initial") {`
	`23`	`+ test("EpsilonGreedy Test : Initial") {`
`24`	`24`	`assert(eg.currentScores == null)`
`25`	`25`	`}`
`26`	`26`
`27`		`- test("GeneAlgorithm Test : Search - Start") {`
	`27`	`+ test("EpsilonGreedy Test : Search - Start") {`
`28`	`28`	`for (i <- 0 until 1000)`
`29`	`29`	`eg.search(evaluation, choices, null, epsilon)`
`30`	`30`	`assert(eg.currentScores.size == choices.size)`
`@@ -33,7 +33,7 @@ class EpsilonGreedySuite extends AnyFunSuite {`
`33`	`33`	`assert((best.head - 0.7).abs < 0.05)`
`34`	`34`	`}`
`35`	`35`
`36`		`- test("GeneAlgorithm Test : Search - Continue") {`
	`36`	`+ test("EpsilonGreedy Test : Search - Continue") {`
`37`	`37`	`var scores: Array[Double] = Array(0, 0, 1 / 1.3, 0)`
`38`	`38`	`for (i <- 0 until 1000) {`
`39`	`39`	`eg.search(evaluation, choices, scores, epsilon)`