From fa054062acfa6e9c1a68efae97c564057d738753 Mon Sep 17 00:00:00 2001 From: Mira <86979912+mira-miracoli@users.noreply.github.com> Date: Tue, 16 Jan 2024 09:18:47 +0100 Subject: [PATCH 1/6] Update histogram.py to python 3 --- data_hacks/histogram.py | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/data_hacks/histogram.py b/data_hacks/histogram.py index 3d16cc8..1dbd509 100755 --- a/data_hacks/histogram.py +++ b/data_hacks/histogram.py @@ -73,9 +73,9 @@ def test_mvsd(): for x in range(10): mvsd.add(x) - assert '%.2f' % mvsd.mean() == "4.50" - assert '%.2f' % mvsd.var() == "8.25" - assert '%.14f' % mvsd.sd() == "2.87228132326901" + assert f"{mvsd.mean().2f}" == "4.50" + assert f"{mvsd.var().2f}" == "8.25" + assert f"{mvsd.sd().14f}" == "2.87228132326901" def load_stream(input_stream, agg_value_key, agg_key_value): @@ -96,8 +96,8 @@ def load_stream(input_stream, agg_value_key, agg_key_value): else: yield DataPoint(Decimal(clean_line), 1) except: - logging.exception('failed %r', line) - print >>sys.stderr, "invalid line %r" % line + logging.exception(f"failed {line:r}") + print >>sys.stderr, f"invalid line {line:r}" def median(values, key=None): @@ -118,7 +118,7 @@ def test_median(): assert 6 == median([8, 7, 9, 1, 2, 6, 3]) # odd-sized list assert 4 == median([4, 5, 2, 1, 9, 10]) # even-sized int list. (4+5)/2 = 4 # even-sized float list. (4.0+5)/2 = 4.5 - assert "4.50" == "%.2f" % median([4.0, 5, 2, 1, 9, 10]) + assert "4.50" == f"{median([4.0, 5, 2, 1, 9, 10]):.2f}" def histogram(stream, options): @@ -147,7 +147,7 @@ def histogram(stream, options): max_v = max_v.value if not max_v > min_v: - raise ValueError('max must be > min. max:%s min:%s' % (max_v, min_v)) + raise ValueError(f"max must be > min. max:{max_v} min:{min_v}") diff = max_v - min_v boundaries = [] @@ -232,20 +232,16 @@ def log_steps(k, n): if max(bucket_counts) > 75: bucket_scale = int(max(bucket_counts) / 75) - print("# NumSamples = %d; Min = %0.2f; Max = %0.2f" % - (samples, min_v, max_v)) + print(f"# NumSamples = {samples}; Min = {min_v:.2f}; Max = {max_v:.2f}") if skipped: - print("# %d value%s outside of min/max" % - (skipped, skipped > 1 and 's' or '')) + print(f"# {skipped} value{skipped > 1 and 's' or ''} outside of min/max") if options.mvsd: - print("# Mean = %f; Variance = %f; SD = %f; Median %f" % - (mvsd.mean(), mvsd.var(), mvsd.sd(), - median(accepted_data, key=lambda x: x.value))) - print "# each " + options.dot + " represents a count of %d" % bucket_scale + print(f"# Mean = {mvsd.mean()}; Variance = {mvsd.var()}; SD = {mvsd.sd()}; Median {median(accepted_data, key=lambda x: x.value)}") + print(f"# each {options.dot} represents a count of {bucket_scale}") bucket_min = min_v bucket_max = min_v percentage = "" - format_string = options.format + ' - ' + options.format + ' [%6d]: %s%s' + format_string = for bucket in range(buckets): bucket_min = bucket_max bucket_max = boundaries[bucket] @@ -254,10 +250,9 @@ def log_steps(k, n): if bucket_count: star_count = bucket_count / bucket_scale if options.percentage: - percentage = " (%0.2f%%)" % (100 * Decimal(bucket_count) / - Decimal(samples)) - print format_string % (bucket_min, bucket_max, bucket_count, options.dot * - star_count, percentage) + percentage = f" {(100 * Decimal(bucket_count) / + Decimal(samples)):.2f}" + print f"{bucket_min:{options.format}} - {bucket_max:{options.format}} [{bucket_count:6d}]: {options.dot * star_count}{percentage}" if __name__ == "__main__": @@ -284,7 +279,7 @@ def log_steps(k, n): parser.add_option("--no-mvsd", dest="mvsd", action="store_false", default=True, help="Disable the calculation of Mean, " + "Variance and SD (improves performance)") - parser.add_option("-f", "--bucket-format", dest="format", default="%10.4f", + parser.add_option("-f", "--bucket-format", dest="format", default="10.4f", help="format for bucket numbers") parser.add_option("-p", "--percentage", dest="percentage", default=False, action="store_true", help="List percentage for each bar") From 0cb0ee53333556f7b1046c72dbe12f0c2e030a13 Mon Sep 17 00:00:00 2001 From: Mira <86979912+mira-miracoli@users.noreply.github.com> Date: Tue, 16 Jan 2024 09:22:03 +0100 Subject: [PATCH 2/6] missed variable --- data_hacks/histogram.py | 1 - 1 file changed, 1 deletion(-) diff --git a/data_hacks/histogram.py b/data_hacks/histogram.py index 1dbd509..92a9edc 100755 --- a/data_hacks/histogram.py +++ b/data_hacks/histogram.py @@ -241,7 +241,6 @@ def log_steps(k, n): bucket_min = min_v bucket_max = min_v percentage = "" - format_string = for bucket in range(buckets): bucket_min = bucket_max bucket_max = boundaries[bucket] From b2e036d719c885f4abbf97a1e469a142f200a77c Mon Sep 17 00:00:00 2001 From: Mira <86979912+mira-miracoli@users.noreply.github.com> Date: Tue, 16 Jan 2024 09:25:17 +0100 Subject: [PATCH 3/6] rm linebreak --- data_hacks/histogram.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/data_hacks/histogram.py b/data_hacks/histogram.py index 92a9edc..5416001 100755 --- a/data_hacks/histogram.py +++ b/data_hacks/histogram.py @@ -249,8 +249,7 @@ def log_steps(k, n): if bucket_count: star_count = bucket_count / bucket_scale if options.percentage: - percentage = f" {(100 * Decimal(bucket_count) / - Decimal(samples)):.2f}" + percentage = f" {(100 * Decimal(bucket_count) / Decimal(samples)):.2f}" print f"{bucket_min:{options.format}} - {bucket_max:{options.format}} [{bucket_count:6d}]: {options.dot * star_count}{percentage}" From 13679bec791627ff29b3b1cb103afd18dd367b55 Mon Sep 17 00:00:00 2001 From: Mira <86979912+mira-miracoli@users.noreply.github.com> Date: Tue, 16 Jan 2024 09:27:09 +0100 Subject: [PATCH 4/6] print needs () --- data_hacks/histogram.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/data_hacks/histogram.py b/data_hacks/histogram.py index 5416001..02cf98d 100755 --- a/data_hacks/histogram.py +++ b/data_hacks/histogram.py @@ -97,7 +97,7 @@ def load_stream(input_stream, agg_value_key, agg_key_value): yield DataPoint(Decimal(clean_line), 1) except: logging.exception(f"failed {line:r}") - print >>sys.stderr, f"invalid line {line:r}" + print(f"invalid line {line:r}", sys.stderr) def median(values, key=None): @@ -250,7 +250,7 @@ def log_steps(k, n): star_count = bucket_count / bucket_scale if options.percentage: percentage = f" {(100 * Decimal(bucket_count) / Decimal(samples)):.2f}" - print f"{bucket_min:{options.format}} - {bucket_max:{options.format}} [{bucket_count:6d}]: {options.dot * star_count}{percentage}" + print(f"{bucket_min:{options.format}} - {bucket_max:{options.format}} [{bucket_count:6d}]: {options.dot * star_count}{percentage}") if __name__ == "__main__": @@ -287,7 +287,7 @@ def log_steps(k, n): if sys.stdin.isatty(): # if isatty() that means it's run without anything piped into it parser.print_usage() - print "for more help use --help" + print("for more help use --help") sys.exit(1) histogram(load_stream(sys.stdin, options.agg_value_key, options.agg_key_value), options) From 24b954fbaedc5be30c0dcda2b77ffad7a3a03b7b Mon Sep 17 00:00:00 2001 From: Mira <86979912+mira-miracoli@users.noreply.github.com> Date: Tue, 16 Jan 2024 09:29:47 +0100 Subject: [PATCH 5/6] should use proper IDE not github --- data_hacks/histogram.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/data_hacks/histogram.py b/data_hacks/histogram.py index 02cf98d..cf1c6d2 100755 --- a/data_hacks/histogram.py +++ b/data_hacks/histogram.py @@ -73,9 +73,9 @@ def test_mvsd(): for x in range(10): mvsd.add(x) - assert f"{mvsd.mean().2f}" == "4.50" - assert f"{mvsd.var().2f}" == "8.25" - assert f"{mvsd.sd().14f}" == "2.87228132326901" + assert f"{mvsd.mean():.2f}" == "4.50" + assert f"{mvsd.var():.2f}" == "8.25" + assert f"{mvsd.sd():.14f}" == "2.87228132326901" def load_stream(input_stream, agg_value_key, agg_key_value): From 66089bb4fd9db439754d83425a6455f60213896f Mon Sep 17 00:00:00 2001 From: Mira <86979912+mira-miracoli@users.noreply.github.com> Date: Tue, 16 Jan 2024 10:55:10 +0100 Subject: [PATCH 6/6] int type needed --- data_hacks/histogram.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/data_hacks/histogram.py b/data_hacks/histogram.py index cf1c6d2..75960b6 100755 --- a/data_hacks/histogram.py +++ b/data_hacks/histogram.py @@ -96,8 +96,8 @@ def load_stream(input_stream, agg_value_key, agg_key_value): else: yield DataPoint(Decimal(clean_line), 1) except: - logging.exception(f"failed {line:r}") - print(f"invalid line {line:r}", sys.stderr) + logging.exception(f"failed {line}") + print(f"invalid line {line}", sys.stderr) def median(values, key=None): @@ -111,7 +111,7 @@ def median(values, key=None): values = sorted(values, key=key) return sum(map(key, - [values[i] for i in median_indeces])) / len(median_indeces) + [values[int(i)] for i in median_indeces])) / len(median_indeces) def test_median(): @@ -250,7 +250,7 @@ def log_steps(k, n): star_count = bucket_count / bucket_scale if options.percentage: percentage = f" {(100 * Decimal(bucket_count) / Decimal(samples)):.2f}" - print(f"{bucket_min:{options.format}} - {bucket_max:{options.format}} [{bucket_count:6d}]: {options.dot * star_count}{percentage}") + print(f"{bucket_min:{options.format}} - {bucket_max:{options.format}} [{bucket_count:6d}]: {options.dot * int(star_count)}{percentage}") if __name__ == "__main__":