From fa054062acfa6e9c1a68efae97c564057d738753 Mon Sep 17 00:00:00 2001
From: Mira <86979912+mira-miracoli@users.noreply.github.com>
Date: Tue, 16 Jan 2024 09:18:47 +0100
Subject: [PATCH 1/6] Update histogram.py to python 3

---
 data_hacks/histogram.py | 37 ++++++++++++++++---------------------
 1 file changed, 16 insertions(+), 21 deletions(-)

diff --git a/data_hacks/histogram.py b/data_hacks/histogram.py
index 3d16cc8..1dbd509 100755
--- a/data_hacks/histogram.py
+++ b/data_hacks/histogram.py
@@ -73,9 +73,9 @@ def test_mvsd():
     for x in range(10):
         mvsd.add(x)
 
-    assert '%.2f' % mvsd.mean() == "4.50"
-    assert '%.2f' % mvsd.var() == "8.25"
-    assert '%.14f' % mvsd.sd() == "2.87228132326901"
+    assert f"{mvsd.mean().2f}" == "4.50"
+    assert f"{mvsd.var().2f}" == "8.25"
+    assert f"{mvsd.sd().14f}" == "2.87228132326901"
 
 
 def load_stream(input_stream, agg_value_key, agg_key_value):
@@ -96,8 +96,8 @@ def load_stream(input_stream, agg_value_key, agg_key_value):
             else:
                 yield DataPoint(Decimal(clean_line), 1)
         except:
-            logging.exception('failed %r', line)
-            print >>sys.stderr, "invalid line %r" % line
+            logging.exception(f"failed {line:r}")
+            print >>sys.stderr, f"invalid line {line:r}"
 
 
 def median(values, key=None):
@@ -118,7 +118,7 @@ def test_median():
     assert 6 == median([8, 7, 9, 1, 2, 6, 3])  # odd-sized list
     assert 4 == median([4, 5, 2, 1, 9, 10])  # even-sized int list. (4+5)/2 = 4
     # even-sized float list. (4.0+5)/2 = 4.5
-    assert "4.50" == "%.2f" % median([4.0, 5, 2, 1, 9, 10])
+    assert "4.50" == f"{median([4.0, 5, 2, 1, 9, 10]):.2f}"
 
 
 def histogram(stream, options):
@@ -147,7 +147,7 @@ def histogram(stream, options):
         max_v = max_v.value
 
     if not max_v > min_v:
-        raise ValueError('max must be > min. max:%s min:%s' % (max_v, min_v))
+        raise ValueError(f"max must be > min. max:{max_v} min:{min_v}")
     diff = max_v - min_v
 
     boundaries = []
@@ -232,20 +232,16 @@ def log_steps(k, n):
     if max(bucket_counts) > 75:
         bucket_scale = int(max(bucket_counts) / 75)
 
-    print("# NumSamples = %d; Min = %0.2f; Max = %0.2f" %
-          (samples, min_v, max_v))
+    print(f"# NumSamples = {samples}; Min = {min_v:.2f}; Max = {max_v:.2f}")
     if skipped:
-        print("# %d value%s outside of min/max" %
-              (skipped, skipped > 1 and 's' or ''))
+        print(f"# {skipped} value{skipped > 1 and 's' or ''} outside of min/max")
     if options.mvsd:
-        print("# Mean = %f; Variance = %f; SD = %f; Median %f" %
-              (mvsd.mean(), mvsd.var(), mvsd.sd(),
-               median(accepted_data, key=lambda x: x.value)))
-    print "# each " + options.dot + " represents a count of %d" % bucket_scale
+        print(f"# Mean = {mvsd.mean()}; Variance = {mvsd.var()}; SD = {mvsd.sd()}; Median {median(accepted_data, key=lambda x: x.value)}")
+    print(f"# each {options.dot} represents a count of {bucket_scale}")
     bucket_min = min_v
     bucket_max = min_v
     percentage = ""
-    format_string = options.format + ' - ' + options.format + ' [%6d]: %s%s'
+    format_string = 
     for bucket in range(buckets):
         bucket_min = bucket_max
         bucket_max = boundaries[bucket]
@@ -254,10 +250,9 @@ def log_steps(k, n):
         if bucket_count:
             star_count = bucket_count / bucket_scale
         if options.percentage:
-            percentage = " (%0.2f%%)" % (100 * Decimal(bucket_count) /
-                                         Decimal(samples))
-        print format_string % (bucket_min, bucket_max, bucket_count, options.dot *
-                               star_count, percentage)
+            percentage = f" {(100 * Decimal(bucket_count) /
+                                         Decimal(samples)):.2f}"
+        print f"{bucket_min:{options.format}} - {bucket_max:{options.format}} [{bucket_count:6d}]: {options.dot * star_count}{percentage}"
 
 
 if __name__ == "__main__":
@@ -284,7 +279,7 @@ def log_steps(k, n):
     parser.add_option("--no-mvsd", dest="mvsd", action="store_false",
                       default=True, help="Disable the calculation of Mean, " +
                       "Variance and SD (improves performance)")
-    parser.add_option("-f", "--bucket-format", dest="format", default="%10.4f",
+    parser.add_option("-f", "--bucket-format", dest="format", default="10.4f",
                       help="format for bucket numbers")
     parser.add_option("-p", "--percentage", dest="percentage", default=False,
                       action="store_true", help="List percentage for each bar")

From 0cb0ee53333556f7b1046c72dbe12f0c2e030a13 Mon Sep 17 00:00:00 2001
From: Mira <86979912+mira-miracoli@users.noreply.github.com>
Date: Tue, 16 Jan 2024 09:22:03 +0100
Subject: [PATCH 2/6] missed variable

---
 data_hacks/histogram.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/data_hacks/histogram.py b/data_hacks/histogram.py
index 1dbd509..92a9edc 100755
--- a/data_hacks/histogram.py
+++ b/data_hacks/histogram.py
@@ -241,7 +241,6 @@ def log_steps(k, n):
     bucket_min = min_v
     bucket_max = min_v
     percentage = ""
-    format_string = 
     for bucket in range(buckets):
         bucket_min = bucket_max
         bucket_max = boundaries[bucket]

From b2e036d719c885f4abbf97a1e469a142f200a77c Mon Sep 17 00:00:00 2001
From: Mira <86979912+mira-miracoli@users.noreply.github.com>
Date: Tue, 16 Jan 2024 09:25:17 +0100
Subject: [PATCH 3/6] rm linebreak

---
 data_hacks/histogram.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/data_hacks/histogram.py b/data_hacks/histogram.py
index 92a9edc..5416001 100755
--- a/data_hacks/histogram.py
+++ b/data_hacks/histogram.py
@@ -249,8 +249,7 @@ def log_steps(k, n):
         if bucket_count:
             star_count = bucket_count / bucket_scale
         if options.percentage:
-            percentage = f" {(100 * Decimal(bucket_count) /
-                                         Decimal(samples)):.2f}"
+            percentage = f" {(100 * Decimal(bucket_count) / Decimal(samples)):.2f}"
         print f"{bucket_min:{options.format}} - {bucket_max:{options.format}} [{bucket_count:6d}]: {options.dot * star_count}{percentage}"
 
 

From 13679bec791627ff29b3b1cb103afd18dd367b55 Mon Sep 17 00:00:00 2001
From: Mira <86979912+mira-miracoli@users.noreply.github.com>
Date: Tue, 16 Jan 2024 09:27:09 +0100
Subject: [PATCH 4/6] print needs ()

---
 data_hacks/histogram.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/data_hacks/histogram.py b/data_hacks/histogram.py
index 5416001..02cf98d 100755
--- a/data_hacks/histogram.py
+++ b/data_hacks/histogram.py
@@ -97,7 +97,7 @@ def load_stream(input_stream, agg_value_key, agg_key_value):
                 yield DataPoint(Decimal(clean_line), 1)
         except:
             logging.exception(f"failed {line:r}")
-            print >>sys.stderr, f"invalid line {line:r}"
+            print(f"invalid line {line:r}", sys.stderr)
 
 
 def median(values, key=None):
@@ -250,7 +250,7 @@ def log_steps(k, n):
             star_count = bucket_count / bucket_scale
         if options.percentage:
             percentage = f" {(100 * Decimal(bucket_count) / Decimal(samples)):.2f}"
-        print f"{bucket_min:{options.format}} - {bucket_max:{options.format}} [{bucket_count:6d}]: {options.dot * star_count}{percentage}"
+        print(f"{bucket_min:{options.format}} - {bucket_max:{options.format}} [{bucket_count:6d}]: {options.dot * star_count}{percentage}")
 
 
 if __name__ == "__main__":
@@ -287,7 +287,7 @@ def log_steps(k, n):
     if sys.stdin.isatty():
         # if isatty() that means it's run without anything piped into it
         parser.print_usage()
-        print "for more help use --help"
+        print("for more help use --help")
         sys.exit(1)
     histogram(load_stream(sys.stdin, options.agg_value_key,
                           options.agg_key_value), options)

From 24b954fbaedc5be30c0dcda2b77ffad7a3a03b7b Mon Sep 17 00:00:00 2001
From: Mira <86979912+mira-miracoli@users.noreply.github.com>
Date: Tue, 16 Jan 2024 09:29:47 +0100
Subject: [PATCH 5/6] should use proper IDE not github

---
 data_hacks/histogram.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/data_hacks/histogram.py b/data_hacks/histogram.py
index 02cf98d..cf1c6d2 100755
--- a/data_hacks/histogram.py
+++ b/data_hacks/histogram.py
@@ -73,9 +73,9 @@ def test_mvsd():
     for x in range(10):
         mvsd.add(x)
 
-    assert f"{mvsd.mean().2f}" == "4.50"
-    assert f"{mvsd.var().2f}" == "8.25"
-    assert f"{mvsd.sd().14f}" == "2.87228132326901"
+    assert f"{mvsd.mean():.2f}" == "4.50"
+    assert f"{mvsd.var():.2f}" == "8.25"
+    assert f"{mvsd.sd():.14f}" == "2.87228132326901"
 
 
 def load_stream(input_stream, agg_value_key, agg_key_value):

From 66089bb4fd9db439754d83425a6455f60213896f Mon Sep 17 00:00:00 2001
From: Mira <86979912+mira-miracoli@users.noreply.github.com>
Date: Tue, 16 Jan 2024 10:55:10 +0100
Subject: [PATCH 6/6] int type needed

---
 data_hacks/histogram.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/data_hacks/histogram.py b/data_hacks/histogram.py
index cf1c6d2..75960b6 100755
--- a/data_hacks/histogram.py
+++ b/data_hacks/histogram.py
@@ -96,8 +96,8 @@ def load_stream(input_stream, agg_value_key, agg_key_value):
             else:
                 yield DataPoint(Decimal(clean_line), 1)
         except:
-            logging.exception(f"failed {line:r}")
-            print(f"invalid line {line:r}", sys.stderr)
+            logging.exception(f"failed {line}")
+            print(f"invalid line {line}", sys.stderr)
 
 
 def median(values, key=None):
@@ -111,7 +111,7 @@ def median(values, key=None):
 
     values = sorted(values, key=key)
     return sum(map(key,
-                   [values[i] for i in median_indeces])) / len(median_indeces)
+                   [values[int(i)] for i in median_indeces])) / len(median_indeces)
 
 
 def test_median():
@@ -250,7 +250,7 @@ def log_steps(k, n):
             star_count = bucket_count / bucket_scale
         if options.percentage:
             percentage = f" {(100 * Decimal(bucket_count) / Decimal(samples)):.2f}"
-        print(f"{bucket_min:{options.format}} - {bucket_max:{options.format}} [{bucket_count:6d}]: {options.dot * star_count}{percentage}")
+        print(f"{bucket_min:{options.format}} - {bucket_max:{options.format}} [{bucket_count:6d}]: {options.dot * int(star_count)}{percentage}")
 
 
 if __name__ == "__main__":