Skip to content

add algorithm="yafu" to factor() #40333

Open
@user202729

Description

@user202729

Problem Description

as in the title. Currently yafu is much faster than even partial factorization using flint

#40317

Proposed Solution

PoC (quite long because yafu is not particularly well-behaved)

import tempfile
import subprocess
import signal
import threading
import sys
from collections import Counter
def _yafu_factor(n: int, *,
				 yafu_executable: str="yafu",
				 yafu_args: list[str]|tuple[str, ...]|dict=(),
				 yafu_cwd: str=None,
				 timeout: Optional[float]=None,
				 verbose=False,
				 show_yafu_stdout=None)->list[int]:
	"""
	TESTS::
		sage: p = next_prime(2^127)
		sage: q = next_prime(2^128)
		sage: _yafu_factor(p*next_prime(q)*q*next_prime(p), timeout=1)  # partial factor using Fermat
		5789...8509 * 5789...9911
		sage: _yafu_factor(p*next_prime(q)*q*next_prime(p), yafu_args={"one": True})  # stop after one factor found
		5789...8509 * 5789...9911
		sage: p = next_prime(2^55)
		sage: _yafu_factor(p*q)  # around 1.5s
		1152921504606847009 * 340282366920938463463374607431768211507
		sage: _yafu_factor(prod(primes(2^30, 2^30+1000)))
		1073741827 * 1073741831 * ... * 1073742773 * 1073742811
	"""
	if isinstance(yafu_args, dict):
		yafu_args=[e for k, v in yafu_args.items() for e in ("-"+str(k),) + (str(v),)*(v is not True)]
	if show_yafu_stdout is None:
		show_yafu_stdout=verbose
	if yafu_cwd is None:
		yafu_cwd=tempfile.gettempdir()
	proc=subprocess.Popen([yafu_executable, *yafu_args], stdin=subprocess.PIPE,
					   stdout=subprocess.PIPE,
					   cwd=yafu_cwd,
					   start_new_session=True,  # needed for killpg() to work
					   )
	proc.stdin.write(f"factor({n})\nquit\n".encode())
	# use stdin instead of command-line args because the latter has length limit
	proc.stdin.flush()
	if show_yafu_stdout:
		data=bytearray()
		def stdout_read_thread_run(proc, data):
			while True:
				part=proc.stdout.read1()
				if not part: break
				data+=part
				stdout=sys.stdout
				if hasattr(stdout, "buffer"):
					stdout.buffer.write(part)
					stdout.buffer.flush()
				else:  # strange corner case (IPython sometimes replace sys.stdout with something else)
					stdout.write(part.decode('u8', errors='replace'))
					stdout.flush()
		stdout_read_thread=threading.Thread(target=stdout_read_thread_run, args=(proc, data))
		stdout_read_thread.start()
	returncode = None
	try:
		returncode = proc.wait(timeout=None if timeout is None else float(timeout))
	except subprocess.TimeoutExpired:
		pass
	finally:
		# run this either on subprocess.TimeoutExpired or user press ctrl-C
		if returncode is None:
			#subprocess.run(..., timeout=...) does not stop the process
			for timeout in [0.5]*5+[0.2]*10+[-1]:
				#proc.kill()  # subprocess may still run, which leads to proc.stdout.read() take forever
				#proc.send_signal(signal.SIGINT)  # same as above
				os.killpg(os.getpgid(proc.pid), signal.SIGINT if timeout>0 else signal.SIGKILL)
				if verbose: print("\nkill signal sent")  # because yafu uses \r, we need initial \n
				try:
					returncode = proc.wait(timeout=abs(float(timeout)))
				except subprocess.TimeoutExpired:
					# not well-behaved subprocess, will try ctrl-C again
					if verbose: print("\nctrl-c did not work, trying again")
				else:
					break
			if verbose: print("\nprocess terminated")
		if show_yafu_stdout:
			stdout_read_thread.join()
	if verbose: print(f"\nreturncode={returncode}")
	if returncode in (11, -11, 128+11):
		import warnings
		from pathlib import Path
		siqs_path=Path(yafu_cwd)/"siqs.dat"
		if siqs_path.is_file():
			warnings.warn(
					f'yafu got segmentation fault, deleting {siqs_path} might help, '
					'see https://github.com/bbuhrow/yafu/issues/61')
	if show_yafu_stdout:
		data=data.decode()
	else:
		data=proc.stdout.read().decode()
	lines=data.splitlines()
	# TODO also possible to parse from factor.log
	# maybe can also parse from factor.json something like the following
	# d = json.loads((Path(yafu_cwd)/"factor.json").read_text().splitlines()[-1])
	# assert N == int(d["input-decimal"])
	# f = d.get("factors-prime", []) + d.get("factors-composite", [])
	f=[ZZ(match[1]) for line in lines if (match:=re.fullmatch(r"[PC]\d+ = (\d+)", line))]
	if verbose and not show_yafu_stdout:
		print(data)
	if prod(f)!=n:
		f=_attempt_to_recover_factors(n, f)
	return Factorization(Counter(f).items())
def _attempt_to_recover_factors(n: ZZ, f: list):
	result = [n]
	for p in f:
		result = [f for r in result for f in ((p, r // p) if 1 < p < r and r % p == 0 else (r,))]
	assert prod(result)==n
	return result

Alternatives Considered

Additional Information

No response

Is there an existing issue for this?

  • I have searched the existing issues for a bug report that matches the one I want to file, without success.

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions