Open
Description
Problem Description
as in the title. Currently yafu is much faster than even partial factorization using flint
Proposed Solution
PoC (quite long because yafu is not particularly well-behaved)
import tempfile
import subprocess
import signal
import threading
import sys
from collections import Counter
def _yafu_factor(n: int, *,
yafu_executable: str="yafu",
yafu_args: list[str]|tuple[str, ...]|dict=(),
yafu_cwd: str=None,
timeout: Optional[float]=None,
verbose=False,
show_yafu_stdout=None)->list[int]:
"""
TESTS::
sage: p = next_prime(2^127)
sage: q = next_prime(2^128)
sage: _yafu_factor(p*next_prime(q)*q*next_prime(p), timeout=1) # partial factor using Fermat
5789...8509 * 5789...9911
sage: _yafu_factor(p*next_prime(q)*q*next_prime(p), yafu_args={"one": True}) # stop after one factor found
5789...8509 * 5789...9911
sage: p = next_prime(2^55)
sage: _yafu_factor(p*q) # around 1.5s
1152921504606847009 * 340282366920938463463374607431768211507
sage: _yafu_factor(prod(primes(2^30, 2^30+1000)))
1073741827 * 1073741831 * ... * 1073742773 * 1073742811
"""
if isinstance(yafu_args, dict):
yafu_args=[e for k, v in yafu_args.items() for e in ("-"+str(k),) + (str(v),)*(v is not True)]
if show_yafu_stdout is None:
show_yafu_stdout=verbose
if yafu_cwd is None:
yafu_cwd=tempfile.gettempdir()
proc=subprocess.Popen([yafu_executable, *yafu_args], stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
cwd=yafu_cwd,
start_new_session=True, # needed for killpg() to work
)
proc.stdin.write(f"factor({n})\nquit\n".encode())
# use stdin instead of command-line args because the latter has length limit
proc.stdin.flush()
if show_yafu_stdout:
data=bytearray()
def stdout_read_thread_run(proc, data):
while True:
part=proc.stdout.read1()
if not part: break
data+=part
stdout=sys.stdout
if hasattr(stdout, "buffer"):
stdout.buffer.write(part)
stdout.buffer.flush()
else: # strange corner case (IPython sometimes replace sys.stdout with something else)
stdout.write(part.decode('u8', errors='replace'))
stdout.flush()
stdout_read_thread=threading.Thread(target=stdout_read_thread_run, args=(proc, data))
stdout_read_thread.start()
returncode = None
try:
returncode = proc.wait(timeout=None if timeout is None else float(timeout))
except subprocess.TimeoutExpired:
pass
finally:
# run this either on subprocess.TimeoutExpired or user press ctrl-C
if returncode is None:
#subprocess.run(..., timeout=...) does not stop the process
for timeout in [0.5]*5+[0.2]*10+[-1]:
#proc.kill() # subprocess may still run, which leads to proc.stdout.read() take forever
#proc.send_signal(signal.SIGINT) # same as above
os.killpg(os.getpgid(proc.pid), signal.SIGINT if timeout>0 else signal.SIGKILL)
if verbose: print("\nkill signal sent") # because yafu uses \r, we need initial \n
try:
returncode = proc.wait(timeout=abs(float(timeout)))
except subprocess.TimeoutExpired:
# not well-behaved subprocess, will try ctrl-C again
if verbose: print("\nctrl-c did not work, trying again")
else:
break
if verbose: print("\nprocess terminated")
if show_yafu_stdout:
stdout_read_thread.join()
if verbose: print(f"\nreturncode={returncode}")
if returncode in (11, -11, 128+11):
import warnings
from pathlib import Path
siqs_path=Path(yafu_cwd)/"siqs.dat"
if siqs_path.is_file():
warnings.warn(
f'yafu got segmentation fault, deleting {siqs_path} might help, '
'see https://github.com/bbuhrow/yafu/issues/61')
if show_yafu_stdout:
data=data.decode()
else:
data=proc.stdout.read().decode()
lines=data.splitlines()
# TODO also possible to parse from factor.log
# maybe can also parse from factor.json something like the following
# d = json.loads((Path(yafu_cwd)/"factor.json").read_text().splitlines()[-1])
# assert N == int(d["input-decimal"])
# f = d.get("factors-prime", []) + d.get("factors-composite", [])
f=[ZZ(match[1]) for line in lines if (match:=re.fullmatch(r"[PC]\d+ = (\d+)", line))]
if verbose and not show_yafu_stdout:
print(data)
if prod(f)!=n:
f=_attempt_to_recover_factors(n, f)
return Factorization(Counter(f).items())
def _attempt_to_recover_factors(n: ZZ, f: list):
result = [n]
for p in f:
result = [f for r in result for f in ((p, r // p) if 1 < p < r and r % p == 0 else (r,))]
assert prod(result)==n
return result
Alternatives Considered
Additional Information
No response
Is there an existing issue for this?
- I have searched the existing issues for a bug report that matches the one I want to file, without success.