Skip to content

Commit

Permalink
Merge pull request #2223 from IrynaRepinetska-dev/feature/UD_Belarusian
Browse files Browse the repository at this point in the history
Add support for UD_BELARUSIAN-HSE dataset (POS)
  • Loading branch information
alanakbik authored Apr 19, 2021
2 parents 217e3e6 + 9dac167 commit 77e6ad6
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 2 deletions.
1 change: 1 addition & 0 deletions flair/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@
from .treebanks import UD_OLD_FRENCH
from .treebanks import UD_GOTHIC
from .treebanks import UD_WOLOF
from .treebanks import UD_BELARUSIAN
from .treebanks import UD_OLD_CHURCH_SLAVONIC
from .treebanks import UD_COPTIC

Expand Down
30 changes: 28 additions & 2 deletions flair/datasets/treebanks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1510,6 +1510,33 @@ def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True, s
super(UD_WOLOF, self).__init__(data_folder, in_memory=in_memory, split_multiwords=split_multiwords)


class UD_BELARUSIAN(UniversalDependenciesCorpus):
def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True, split_multiwords: bool = True):

if type(base_path) == str:
base_path: Path = Path(base_path)

# this dataset name
dataset_name = self.__class__.__name__.lower()

# default dataset folder is the cache root
if not base_path:
base_path = Path(flair.cache_root) / "datasets"
data_folder = base_path / dataset_name

# download data if necessary
web_path = "https://github.com/UniversalDependencies/UD_Belarusian-HSE/master"
cached_path(f"{web_path}/be_hse-ud-dev.conllu", Path("datasets") / dataset_name)
cached_path(
f"{web_path}/be_hse-ud-test.conllu", Path("datasets") / dataset_name
)
cached_path(
f"{web_path}/be_hse-ud-train.conllu", Path("datasets") / dataset_name
)

super(UD_BELARUSIAN, self).__init__(data_folder, in_memory=in_memory, split_multiwords=split_multiwords)


class UD_COPTIC(UniversalDependenciesCorpus):
def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True, split_multiwords: bool = True):

Expand All @@ -1534,5 +1561,4 @@ def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True, s
f"{web_path}/cop_scriptorium-ud-train.conllu", Path("datasets") / dataset_name
)

super(UD_COPTIC, self).__init__(data_folder, in_memory=in_memory, split_multiwords=split_multiwords)

super(UD_COPTIC, self).__init__(data_folder, in_memory=in_memory, split_multiwords=split_multiwords)

0 comments on commit 77e6ad6

Please sign in to comment.