diff --git a/config.ini b/config.ini index 7aa6487b1..e5d6169ff 100755 --- a/config.ini +++ b/config.ini @@ -53,6 +53,11 @@ naming_rule = number+'-'+title max_title_len = 50 ; 刮削后图片是否命名为番号 image_naming_with_number = 0 +; 番号大写 1 | 0, 仅在写入数据时会进行大写转换, 搜索刮削流程则不影响 +number_uppercase = 0 +; 自定义正则表达式, 多个正则使用空格隔开, 第一个分组为提取的番号, 若自定义正则未能匹配到番号则使用默认规则 +; example: ([A-Za-z]{2,4}\-\d{3}) ([A-Za-z]{2,4}00\d{3}) +number_regexs = [update] update_check = 1 diff --git a/config.py b/config.py index 897df6617..e008de12d 100644 --- a/config.py +++ b/config.py @@ -343,6 +343,18 @@ def image_naming_with_number(self) -> bool: except: return False + def number_uppercase(self) -> bool: + try: + return self.conf.getboolean("Name_Rule", "number_uppercase") + except: + return False + + def number_regexs(self) -> str: + try: + return self.conf.get("Name_Rule", "number_regexs") + except: + return "" + def update_check(self) -> bool: try: return self.conf.getboolean("update", "update_check") @@ -473,6 +485,8 @@ def _default_config() -> configparser.ConfigParser: conf.set(sec4, "naming_rule", "number + '-' + title") conf.set(sec4, "max_title_len", "50") conf.set(sec4, "image_naming_with_number", "0") + conf.set(sec4, "number_uppercase", "0") + conf.set(sec4, "number_regexs", "") sec5 = "update" conf.add_section(sec5) diff --git a/number_parser.py b/number_parser.py index 6d19c5eaf..d54d9a066 100755 --- a/number_parser.py +++ b/number_parser.py @@ -40,6 +40,15 @@ def get_number(debug: bool, file_path: str) -> str: filepath = os.path.basename(file_path) # debug True 和 False 两块代码块合并,原因是此模块及函数只涉及字符串计算,没有IO操作,debug on时输出导致异常信息即可 try: + # 先对自定义正则进行匹配 + if config.getInstance().number_regexs().split().__len__() > 0: + for regex in config.getInstance().number_regexs().split(): + try: + if re.search(regex, filepath): + return re.search(regex, filepath).group() + except Exception as e: + print(f'[-]custom regex exception: {e} [{regex}]') + file_number = get_number_by_dict(filepath) if file_number: return file_number diff --git a/scraper.py b/scraper.py index 06a64f81e..c5e0d82d8 100644 --- a/scraper.py +++ b/scraper.py @@ -165,6 +165,10 @@ def get_data_from_json( cover_small = tmpArr[0].strip('\"').strip('\'') # ====================处理异常字符 END================== #\/:*?"<>| + # 处理大写 + if conf.number_uppercase(): + json_data['number'] = number.upper() + # 返回处理后的json_data json_data['title'] = title json_data['original_title'] = title