-
Notifications
You must be signed in to change notification settings - Fork 8
/
chatgpt_submission_public.py
78 lines (57 loc) · 2.04 KB
/
chatgpt_submission_public.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import os
import openai
import time
from bs4 import BeautifulSoup
import pprint
import tiktoken
openai.api_key = ""
rewrite_path = ""
import os
if os.path.isfile(rewrite_path):
os.remove(rewrite_path)
def num_tokens_from_string(string: str, encoding_name: str) -> int:
encoding = tiktoken.get_encoding(encoding_name)
num_tokens = len(encoding.encode(string))
return num_tokens
def chatgpt_completion(model_new="gpt-3.5-turbo",prompt_new="hi", temperature_new=0.05, top_p_new=1, n_new=1, max_tokens_new=100):
Chat_Completion = openai.ChatCompletion.create(
model=model_new,
messages=[
{"role": "user", "content": prompt_new}
],
temperature=temperature_new,
top_p=top_p_new,
n=n_new,
max_tokens=max_tokens_new,
presence_penalty=0,
frequency_penalty=0
)
return Chat_Completion
directory = ''
list_of_text_contents = []
list_of_files = []
for filename in os.listdir(directory):
f = os.path.join(directory, filename)
if os.path.isfile(f):
print(os.path.basename(os.path.normpath(f))[:-4])
list_of_files.append(os.path.basename(os.path.normpath(f))[:-4])
with open(f) as fp:
soup = BeautifulSoup(fp, features="xml")
text = soup.find('TEXT')
text_content = text.contents[0]
list_of_text_contents.append(text_content)
for i in range(len(list_of_text_contents)):
prompt = "" + list_of_text_contents[i]
num_tokens = num_tokens_from_string(prompt, "gpt2")
print(num_tokens)
completion = chatgpt_completion(prompt_new=prompt,max_tokens_new=4000)
rewrite_finding = completion.choices[0].message.content
rewrite_file = list_of_files[i] + "_anonymized.txt"
with open(rewrite_file, "w") as f:
f.write(rewrite_finding)
print("-----------The" + str(i + 1) + "个\n-----------")
print("-----------My prompt " + "\n-----------")
print(prompt)
print("-----------Anonymized " + "\n-----------")
print(rewrite_finding)
time.sleep(10)