Skip to content

Commit

Permalink
Merge pull request #2245 from Gloriaky/master
Browse files Browse the repository at this point in the history
add latin dataset
  • Loading branch information
alanakbik authored Apr 21, 2021
2 parents 263cd7a + a05173b commit 6926cc0
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 0 deletions.
1 change: 1 addition & 0 deletions flair/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,3 +252,4 @@
from .biomedical import BIOBERT_SPECIES_S800
from .biomedical import BIOBERT_GENE_BC2GM
from .biomedical import BIOBERT_GENE_JNLPBA
from.treebanks import UD_LATIN
28 changes: 28 additions & 0 deletions flair/datasets/treebanks.py
Original file line number Diff line number Diff line change
Expand Up @@ -567,6 +567,34 @@ def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True, s
super(UD_ITALIAN, self).__init__(data_folder, in_memory=in_memory, split_multiwords=split_multiwords)


class UD_LATIN(UniversalDependenciesCorpus):
def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True, split_multiwords: bool = True):

if type(base_path) == str:
base_path: Path = Path(base_path)

# this dataset name
dataset_name = self.__class__.__name__.lower()

# default dataset folder is the cache root
if not base_path:
base_path = Path(flair.cache_root) / "datasets"
data_folder = base_path / dataset_name

# download data if necessary
web_path = "https://github.com/UniversalDependencies/UD_Latin-LLCT/master/"
cached_path(f"{web_path}/la_llct-ud-dev.conllu", Path("datasets") / dataset_name)
cached_path(
f"{web_path}/la_llct-ud-test.conllu", Path("datasets") / dataset_name
)
cached_path(
f"{web_path}/la_llct-ud-train.conllu", Path("datasets") / dataset_name
)

super(UD_LATIN, self).__init__(data_folder, in_memory=in_memory, split_multiwords=split_multiwords)



class UD_SPANISH(UniversalDependenciesCorpus):
def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True, split_multiwords: bool = True):

Expand Down

0 comments on commit 6926cc0

Please sign in to comment.