From d471052f3ca556981e9970a38f40c159b7b402bf Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:33:38 +0900
Subject: [PATCH 01/30] Create TUTORIAL_1_BASICS.md

---
 resources/docs/KOR_docs/TUTORIAL_1_BASICS.md | 268 +++++++++++++++++++
 1 file changed, 268 insertions(+)
 create mode 100644 resources/docs/KOR_docs/TUTORIAL_1_BASICS.md

diff --git a/resources/docs/KOR_docs/TUTORIAL_1_BASICS.md b/resources/docs/KOR_docs/TUTORIAL_1_BASICS.md
new file mode 100644
index 000000000..da07b085d
--- /dev/null
+++ b/resources/docs/KOR_docs/TUTORIAL_1_BASICS.md
@@ -0,0 +1,268 @@
+# 튜토리얼 1: NLP 기본 타입들
+
+튜토리얼 1부에서는 이 라이브러리에서 사용되는 몇 가지 기본 유형을 살펴볼 것 입니다.
+
+## 문장 생성
+
+flair 라이브러리의 중심에는 'Sentence'과 'Token'이라는 두 가지 유형의 오브젝트가 존재합니다.
+문장(Sentence)은 본문 문장(Token)을 담고 있으며 본질적으로 토큰(Token)의 목록입니다.
+
+먼저 예문 'Sentence' 를 만드는 것으로 시작하겠습니다.
+```python
+# 문장 객체는 특정 태그를 지정할 수 있는 문장을 포함합니다.
+from flair.data import Sentence
+
+# 문장 객체를 만드는 모습
+sentence = Sentence('The grass is green.')
+
+# 문자열이 포함된 Sentence 객체 출력
+print(sentence)
+```
+
+출력 결과:
+
+```console
+Sentence: "The grass is green ."   [− Tokens: 5]
+```
+
+출력문에서 문장이 5개의 토큰으로 구성되어 있음을 알 수 있습니다.
+다음과 같이 토큰의 ID 나 인덱스를 통해 문장의 토큰에 액세스할 수도 있습니다.
+
+```python
+# token id
+print(sentence.get_token(4))
+# index itself
+print(sentence[3])
+```
+
+두 출력문은 아래와 같은 결과가 나오게 됩니다.
+
+```console
+Token: 4 green
+```
+
+위 출력문에는 토큰 ID(4)와 토큰의 어휘 값("green")이 포함됩니다. 또한 문장의 모든 토큰에 대해 반복하여 출력이 가능합니다.
+
+```python
+for token in sentence:
+    print(token)
+```
+
+출력 결과:
+
+```console
+Token: 1 The
+Token: 2 grass
+Token: 3 is
+Token: 4 green
+Token: 5 .
+```
+
+## 토큰화
+
+위와 같이 'Sentence'를 생성하면 텍스트는 [세그톡 라이브러리](https://pypi.org/project/segtok/)에 의해 자동 토큰화됩니다.
+
+### 토큰화를 사용 안하기
+
+이 토큰나이저를 사용하지 않으려면 `use_tokenizer` 플래그를 `False`로 설정하십시오.
+
+```python
+from flair.data import Sentence
+
+# 'use_tokenizer' flag를 false로 설정하여 토큰화하지 않는 모습
+untokenized_sentence = Sentence('The grass is green.', use_tokenizer=False)
+
+# 출력
+print(untokenized_sentence)
+```
+
+이 경우 토큰화가 수행되지 않고 텍스트가 공백으로 분할되므로 토큰이 4개만 생성됩니다.
+
+### 다른 토크나이저를 사용하는 경우
+
+사용자 지정 토큰나이저를 초기화 방법에 전달할 수도 있습니다. 
+예를 들어 일본어를 토큰화하려는 경우, 문장 대신 다음과 같이 'janome' 토큰나이저를 사용할 수 있습니다.
+
+```python
+from flair.data import Sentence
+from flair.tokenization import JapaneseTokenizer
+
+# 일본어 토크나이저 초기화
+tokenizer = JapaneseTokenizer("janome"
+
+# 문장 생성
+japanese_sentence = Sentence("私はベルリンが好き", use_tokenizer=tokenizer)
+
+# 문장 출력
+print(japanese_sentence)
+```
+
+출력 결과:
+
+```console
+Sentence: "私 は ベルリン が 好き"   [− Tokens: 5]
+```
+
+다음과 같이 토큰화 루틴을 직접 작성할 수 있습니다. 
+
+### 사전 토큰화된 시퀀스 사용
+사전 토큰화된 시퀀스를 단어 목록으로 전달할 수 있습니다.
+
+```python
+from flair.data import Sentence
+sentence = Sentence(['The', 'grass', 'is', 'green', '.'])
+print(sentence)
+```
+
+출력 결과:
+
+```console
+Sentence: "The grass is green ."   [− Tokens: 5]
+```
+
+
+## 라벨 추가
+
+### 토큰에 라벨 추가
+
+Flair에서는 모든 데이터 점에 레이블을 지정할 수 있습니다. 예를 들어 단어에 레이블을 지정하거나 문장에 레이블을 지정할 수 있습니다.
+
+```python
+# 문장 속 단어에 대한 태그 추가
+sentence[3].add_tag('ner', 'color')
+
+# 문장의 모든 태그 출력
+print(sentence.to_tagged_string())
+```
+
+출력 결과:
+
+```console
+The grass is green <color> .
+```
+
+라벨 클래스의 각 태그는 다음과 같이 옆에 score가 표시됩니다.
+
+```python
+# 3번째 인덱스의 토큰 가져오기
+token = sentence[3]
+
+# 토큰의 ner 태그를 가져오기
+tag = token.get_tag('ner')
+
+# 토큰 출력
+print(f'"{token}" is tagged as "{tag.value}" with confidence score "{tag.score}"')
+```
+
+출력 결과:
+
+```console
+"Token: 4 green" is tagged as "color" with confidence score "1.0"
+```
+
+방금의 color 태그는 수동으로 추가했기 때문에 1.0점입니다. 태그가 다음 항목에 의해 예측되는 경우
+시퀀스 레이블러, 점수 값은 분류자 신뢰도를 나타냅니다.
+
+### 문장에 라벨 추가
+
+전체 문장에 라벨도 추가할 수 있습니다.
+예를 들어, 아래 예제는 문장에 '스포츠'라는 레이블을 추가하는 방법을 보여줍니다.
+
+
+```python
+sentence = Sentence('France is the current world cup winner.')
+
+# add a label to a sentence
+sentence.add_label('topic', 'sports')
+
+print(sentence)
+
+# Alternatively, you can also create a sentence with label in one line
+sentence = Sentence('France is the current world cup winner.').add_label('topic', 'sports')
+
+print(sentence)
+```
+
+출력 결과: 
+
+```console
+Sentence: "France is the current world cup winner."   [− Tokens: 7  − Sentence-Labels: {'topic': [sports (1.0)]}]
+```
+
+위 문장은 완벽하게 '스포츠' 항목에 속함을 나타냅니다.
+
+### 다중 레이블
+
+모든 데이터에 대해 여러 번 레이블을 지정할 수 있습니다. 예를 들어 문장은 두 가지 주제에 속할 수 있습니다. 이 경우 레이블 이름이 같은 레이블 두 개를 추가합니다.
+
+```python
+sentence = Sentence('France is the current world cup winner.')
+
+# this sentence has multiple topic labels
+sentence.add_label('topic', 'sports')
+sentence.add_label('topic', 'soccer')
+```
+
+동일한 문장에 대해 다른 주석 계층을 추가할 수 있습니다. 주제 옆에서 문장의 "언어"를 예측할 수도 있습니다. 이 경우 다른 레이블 이름을 가진 레이블을 추가합니다.
+
+```python
+sentence = Sentence('France is the current world cup winner.')
+
+# this sentence has multiple "topic" labels
+sentence.add_label('topic', 'sports')
+sentence.add_label('topic', 'soccer')
+
+# this sentence has a "language" label
+sentence.add_label('language', 'English')
+
+print(sentence)
+```
+
+출력 결과: 
+
+```console
+Sentence: "France is the current world cup winner."   [− Tokens: 7  − Sentence-Labels: {'topic': [sports (1.0), soccer (1.0)], 'language': [English (1.0)]}]
+```
+
+이 문장에 두 개의 "주제" 라벨과 하나의 "언어" 라벨이 있음을 나타냅니다.
+
+### 문장의 레이블에 액세스
+
+다음과 같은 레이블에 액세스할 수 있습니다.
+
+```python
+for label in sentence.labels:
+    print(label)
+```
+
+각 라벨은 'Label' 개체이므로 라벨의 `val` 및 `score` 필드에 직접 액세스할 수도 있습니다.
+
+```python
+print(sentence.to_plain_string())
+for label in sentence.labels:
+    print(f' - classified as "{label.value}" with score {label.score}')
+```
+
+출력 결과:
+
+```console
+France is the current world cup winner.
+ - classified as "sports" with score 1.0
+ - classified as "soccer" with score 1.0
+ - classified as "English" with score 1.0
+```
+
+한 레이어의 레이블에만 관심이 있는 경우 다음과 같이 액세스할 수 있습니다.
+
+```python
+for label in sentence.get_labels('topic'):
+    print(label)
+```
+
+위의 예제는 topic 라벨만 제공합니다 
+
+## 다음 튜토리얼
+
+지금까지 문장을 만들고 수동으로 라벨을 붙이는 방법에 대해 알아보았습니다.
+
+이제 [사전 교육된 모델](/docs/TUTORIAL_2_TAGGING.md)을 사용하여 텍스트에 태그를 지정하는 방법에 대해 알아보겠습니다.

From f9949bb56024ff327b06c162dffa6e5ad3b5846b Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:34:03 +0900
Subject: [PATCH 02/30] Create TUTORIAL_2_TAGGING.md

---
 resources/docs/KOR_docs/TUTORIAL_2_TAGGING.md | 365 ++++++++++++++++++
 1 file changed, 365 insertions(+)
 create mode 100644 resources/docs/KOR_docs/TUTORIAL_2_TAGGING.md

diff --git a/resources/docs/KOR_docs/TUTORIAL_2_TAGGING.md b/resources/docs/KOR_docs/TUTORIAL_2_TAGGING.md
new file mode 100644
index 000000000..44d9274a3
--- /dev/null
+++ b/resources/docs/KOR_docs/TUTORIAL_2_TAGGING.md
@@ -0,0 +1,365 @@
+# 튜토리얼 2: 텍스트 태깅
+
+튜토리얼 2부는 1부를 어느정도 학습하였다고 가정하고 진행하겠습니다. 
+여기서는 사전 훈련된 모델을 사용하여 텍스트에 태그를 지정합니다.
+
+## 사전 훈련된 모델을 사용하여 태깅
+
+개체명 인식(NER)에 대해 사전 훈련된 모델을 사용하겠습니다.
+이 모델은 영어 CoNLL-03 과제를 통해 교육되었으며 4개의 다른 실체를 인식할 수 있습니다.
+
+```python
+from flair.models import SequenceTagger
+
+tagger = SequenceTagger.load('ner')
+```
+
+문장에서 tagger의 메소드인 predict()를 사용할 수 있습니다.
+예측 태그를 토큰에 추가합니다. 여기서는 문장에 두 개의 명명된 엔터티가 있는 문장을 사용하겠습니다.
+
+```python
+sentence = Sentence('George Washington went to Washington.')
+
+# predict NER tags
+tagger.predict(sentence)
+
+# print sentence with predicted tags
+print(sentence.to_tagged_string())
+```
+
+출력 결과: 
+```console
+George <B-PER> Washington <E-PER> went to Washington <S-LOC> . 
+```
+
+### 주석이 달린 범위 받기
+
+많은 시퀀스 라벨링 방법은 여러 단어로 구성된 범위에 주석을 달게 됩니다. (예 : "조지 워싱턴")
+다음과 같이 태그가 지정된 문장에서 이러한 범위를 직접 얻을 수 있습니다.
+
+```python
+for entity in sentence.get_spans('ner'):
+    print(entity)
+```
+
+출력 결과:
+```console
+Span [1,2]: "George Washington"   [− Labels: PER (0.9968)]
+Span [5]: "Washington"   [− Labels: LOC (0.9994)]
+```
+
+이것은 "조지 워싱턴"이 사람이고 "워싱턴"이 사람임을 나타냅니다.
+위치(LOC)에는 각각 문장과 문장 내 위치, 라벨이 있고, 값 및 점수(예측에 대한 신뢰)가 있어야 합니다.
+또한 저희는 위치 오프셋과 같은 추가 정보를 얻을 수 있습니다.
+문장의 각 실체는 다음을 호출합니다.
+
+```python
+print(sentence.to_dict(tag_type='ner'))
+```
+
+출력 결과:
+```console
+{'text': 'George Washington went to Washington.',
+    'entities': [
+        {'text': 'George Washington', 'start_pos': 0, 'end_pos': 17, 'type': 'PER', 'confidence': 0.999},
+        {'text': 'Washington', 'start_pos': 26, 'end_pos': 36, 'type': 'LOC', 'confidence': 0.998}
+    ]}
+```
+
+
+### 멀티 태깅
+
+예를 들어 NER 및 POS(Part-of-Speech) 태그와 같은 여러 유형의 주석을 한 번에 예측하려는 경우도 있습니다.
+이를 위해 다음과 같이 새로운 멀티태거 개체를 사용할 수 있습니다.
+
+```python
+from flair.models import MultiTagger
+
+# load tagger for POS and NER 
+tagger = MultiTagger.load(['pos', 'ner'])
+
+# make example sentence
+sentence = Sentence("George Washington went to Washington.")
+
+# predict with both models
+tagger.predict(sentence)
+
+print(sentence)
+``` 
+
+이 문장에는 두 가지 유형의 주석이 있습니다. POS와 NER입니다.
+
+### 사전 훈련된 시퀀스 태거 모델 목록
+
+적절한 교육을 통과하여 로드할 사전 교육 모델을 선택합니다.
+`SequenceTagger` 클래스의 `load()` 메서드에 문자열을 지정합니다.
+
+현재 및 커뮤니티 기반 모델의 전체 목록은 [__model hub__](https://huggingface.co/models?library=flair&sort=downloads)에서 찾아볼 수 있다.
+최소한 다음과 같은 사전 교육 모델이 제공됩니다(자세한 정보를 보려면 ID 링크 클릭).
+모델 및 온라인 데모):
+
+#### 영어 모델들
+
+| ID | 태스크 | 언어 | 훈련 데이터셋 | 정확도 | 참고사항 |
+| -------------    | ------------- |------------- |------------- | ------------- | ------------- |
+| '[ner](https://huggingface.co/flair/ner-english)' | NER (4-class) |  English | Conll-03  |  **93.03** (F1) |
+| '[ner-fast](https://huggingface.co/flair/ner-english-fast)' | NER (4-class)  |  English  |  Conll-03  |  **92.75** (F1) | (fast model)
+| '[ner-large](https://huggingface.co/flair/ner-english-large)' | NER (4-class)  |  English  |  Conll-03  |  **94.09** (F1) | (large model)
+| 'ner-pooled' | NER (4-class)  |  English |  Conll-03  |  **93.24** (F1) | (memory inefficient)
+| '[ner-ontonotes](https://huggingface.co/flair/ner-english-ontonotes)' | NER (18-class) |  English | Ontonotes  |  **89.06** (F1) |
+| '[ner-ontonotes-fast](https://huggingface.co/flair/ner-english-ontonotes-fast)' | NER (18-class) |  English | Ontonotes  |  **89.27** (F1) | (fast model)
+| '[ner-ontonotes-large](https://huggingface.co/flair/ner-english-ontonotes-large)' | NER (18-class) |  English | Ontonotes  |  **90.93** (F1) | (large model)
+| '[chunk](https://huggingface.co/flair/chunk-english)' |  Chunking   |  English | Conll-2000     |  **96.47** (F1) |
+| '[chunk-fast](https://huggingface.co/flair/chunk-english-fast)' |   Chunking   |  English | Conll-2000     |  **96.22** (F1) |(fast model)
+| '[pos](https://huggingface.co/flair/pos-english)' |  POS-tagging |   English |  Ontonotes     |**98.19** (Accuracy) |
+| '[pos-fast](https://huggingface.co/flair/pos-english-fast)' |  POS-tagging |   English |  Ontonotes     |  **98.1** (Accuracy) |(fast model)
+| '[upos](https://huggingface.co/flair/upos-english)' |  POS-tagging (universal) | English | Ontonotes     |  **98.6** (Accuracy) |
+| '[upos-fast](https://huggingface.co/flair/upos-english-fast)' |  POS-tagging (universal) | English | Ontonotes     |  **98.47** (Accuracy) | (fast model)
+| '[frame](https://huggingface.co/flair/frame-english)'  |   Frame Detection |  English | Propbank 3.0     |  **97.54** (F1) |
+| '[frame-fast](https://huggingface.co/flair/frame-english-fast)'  |  Frame Detection |  English | Propbank 3.0     |  **97.31** (F1) | (fast model)
+| 'negation-speculation'  | Negation / speculation |English |  Bioscope | **80.2** (F1) |
+
+### 다국어 모델
+
+단일 모델 내에서 여러 언어로 텍스트를 처리할 수 있는 새로운 모델을 배포합니다.
+
+NER 모델은 4개 언어(영어, 독일어, 네덜란드어 및 스페인어) 이상, PoS 모델은 12개 언어(영어, 독일어, 프랑스어, 이탈리아어, 네덜란드어, 폴란드어, 스페인어, 스웨덴어, 덴마크어, 노르웨이어, 핀란드어 및 체코어)이 존재합니다.
+
+| ID | 태스크 | 언어 | 훈련 데이터셋 | 정확도 | 참고사항 |
+| -------------    | ------------- |------------- |------------- | ------------- | ------------- |
+| '[ner-multi](https://huggingface.co/flair/ner-multi)' | NER (4-class) | Multilingual | Conll-03   |  **89.27**  (average F1) | (4 languages)
+| '[ner-multi-fast](https://huggingface.co/flair/ner-multi-fast)' | NER (4-class)|  Multilingual |  Conll-03   |  **87.91**  (average F1) | (4 languages)
+| '[pos-multi](https://huggingface.co/flair/upos-multi)' |  POS-tagging   |  Multilingual |  UD Treebanks  |  **96.41** (average acc.) |  (12 languages)
+| '[pos-multi-fast](https://huggingface.co/flair/upos-multi-fast)' |  POS-tagging |  Multilingual |  UD Treebanks  |  **92.88** (average acc.) | (12 languages) 
+
+이러한 언어로 된 텍스트를 모델에 전달할 수 있습니다. 특히, NER는 프랑스어와 같이 훈련되지 않은 언어에도 적용되었습니다.
+
+#### 다른 언어들을 위한 모델들
+
+| ID | 태스크 | 언어 | 훈련 데이터셋 | 정확도 | 참고사항 |
+| -------------    | ------------- |------------- |------------- |------------- | ------------ |
+| '[ar-ner](https://huggingface.co/megantosh/flair-arabic-multi-ner)' | NER (4-class) | Arabic | AQMAR & ANERcorp (curated) |  **86.66** (F1) | |
+| '[ar-pos](https://huggingface.co/megantosh/flair-arabic-dialects-codeswitch-egy-lev)' | NER (4-class) | Arabic (+dialects)| combination of corpora |  | |
+| '[de-ner](https://huggingface.co/flair/ner-german)' | NER (4-class) |  German | Conll-03  |  **87.94** (F1) | |
+| '[de-ner-large](https://huggingface.co/flair/ner-german-large)' | NER (4-class) |  German | Conll-03  |  **92,31** (F1) | |
+| 'de-ner-germeval' | NER (4-class) | German | Germeval  |  **84.90** (F1) | |
+| '[de-ner-legal](https://huggingface.co/flair/ner-german-legal)' | NER (legal text) |  German | [LER](https://github.com/elenanereiss/Legal-Entity-Recognition) dataset  |  **96.35** (F1) | |
+| 'de-pos' | POS-tagging | German | UD German - HDT  |  **98.50** (Accuracy) | |
+| 'de-pos-tweets' | POS-tagging | German | German Tweets  |  **93.06** (Accuracy) | [stefan-it](https://github.com/stefan-it/flair-experiments/tree/master/pos-twitter-german) |
+| 'de-historic-indirect' | historical indirect speech | German | @redewiedergabe project |  **87.94** (F1) | [redewiedergabe](https://github.com/redewiedergabe/tagger) | |
+| 'de-historic-direct' | historical direct speech |  German | @redewiedergabe project |  **87.94** (F1) | [redewiedergabe](https://github.com/redewiedergabe/tagger) | |
+| 'de-historic-reported' | historical reported speech | German |  @redewiedergabe project |  **87.94** (F1) | [redewiedergabe](https://github.com/redewiedergabe/tagger) | |
+| 'de-historic-free-indirect' | historical free-indirect speech | German | @redewiedergabe project |  **87.94** (F1) | [redewiedergabe](https://github.com/redewiedergabe/tagger) | |
+| '[fr-ner](https://huggingface.co/flair/ner-french)' | NER (4-class) | French | [WikiNER (aij-wikiner-fr-wp3)](https://github.com/dice-group/FOX/tree/master/input/Wikiner)  |  **95.57** (F1) | [mhham](https://github.com/mhham) |
+| '[es-ner-large](https://huggingface.co/flair/ner-spanish-large)' | NER (4-class) | Spanish | CoNLL-03  |  **90,54** (F1) | [mhham](https://github.com/mhham) |
+| '[nl-ner](https://huggingface.co/flair/ner-dutch)' | NER (4-class) | Dutch |  [CoNLL 2002](https://www.clips.uantwerpen.be/conll2002/ner/)  |  **92.58** (F1) |  |
+| '[nl-ner-large](https://huggingface.co/flair/ner-dutch-large)' | NER (4-class) | Dutch | Conll-03 |  **95,25** (F1) |  |
+| 'nl-ner-rnn' | NER (4-class) | Dutch | [CoNLL 2002](https://www.clips.uantwerpen.be/conll2002/ner/)  |  **90.79** (F1) | |
+| '[da-ner](https://huggingface.co/flair/ner-danish)' | NER (4-class) | Danish |  [Danish NER dataset](https://github.com/alexandrainst/danlp)  |   | [AmaliePauli](https://github.com/AmaliePauli) |
+| 'da-pos' | POS-tagging | Danish | [Danish Dependency Treebank](https://github.com/UniversalDependencies/UD_Danish-DDT/blob/master/README.md)  |  | [AmaliePauli](https://github.com/AmaliePauli) |
+| 'ml-pos' | POS-tagging | Malayalam | 30000 Malayalam sentences  | **83** | [sabiqueqb](https://github.com/sabiqueqb) |
+| 'ml-upos' | POS-tagging | Malayalam | 30000 Malayalam sentences | **87** | [sabiqueqb](https://github.com/sabiqueqb) |
+| 'pt-pos-clinical' | POS-tagging | Portuguese | [PUCPR](https://github.com/HAILab-PUCPR/portuguese-clinical-pos-tagger) | **92.39** | [LucasFerroHAILab](https://github.com/LucasFerroHAILab) for clinical texts |
+
+
+### 독일어 문장 태그 지정
+
+위 목록에 표시된 것처럼 영어 이외의 언어에 대한 사전 교육 모델도 제공합니다. 독일어 문장에 태그를 지정하려면 적절한 모델을 로드하면 됩니다.
+
+```python
+
+# load model
+tagger = SequenceTagger.load('de-ner')
+
+# make German sentence
+sentence = Sentence('George Washington ging nach Washington.')
+
+# predict NER tags
+tagger.predict(sentence)
+
+# print sentence with predicted tags
+print(sentence.to_tagged_string())
+```
+
+출력 결과: 
+```console
+George <B-PER> Washington <E-PER> ging nach Washington <S-LOC> .
+```
+
+### 아랍어 문장 태그 지정
+
+Flair는 또한 오른쪽에서 왼쪽으로 쓰는 언어에서도 작동한다. 아랍어 문장에 태그를 지정하려면 적절한 모델을 로드하면 됩니다.
+```python
+
+# load model
+tagger = SequenceTagger.load('ar-ner')
+
+# make Arabic sentence
+sentence = Sentence("احب برلين")
+
+# predict NER tags
+tagger.predict(sentence)
+
+# print sentence with predicted tags
+for entity in sentence.get_labels('ner'):
+    print(entity)
+```
+
+출력 : 
+```console
+LOC [برلين (2)] (0.9803) 
+```
+
+### 다국어 텍스트 태그 지정
+
+여러 언어(예: 영어 및 독일어)의 텍스트가 있는 경우, 새로운 다국어 모델을 사용할 수 있습니다.
+
+```python
+
+# load model
+tagger = SequenceTagger.load('pos-multi')
+
+# text with English and German sentences
+sentence = Sentence('George Washington went to Washington. Dort kaufte er einen Hut.')
+
+# predict PoS tags
+tagger.predict(sentence)
+
+# print sentence with predicted tags
+print(sentence.to_tagged_string())
+```
+
+출력 결과: 
+```console
+George <PROPN> Washington <PROPN> went <VERB> to <ADP> Washington <PROPN> . <PUNCT>
+
+Dort <ADV> kaufte <VERB> er <PRON> einen <DET> Hut <NOUN> . <PUNCT>
+```
+
+그래서 이 문장에서는 'went'와 'kaufte'가 모두 동사로 식별된다.
+
+### 실험: 시맨틱 프레임 탐지
+
+영어의 경우 Propbank 3.0 프레임을 사용하여 학습된 텍스트 의미 프레임을 감지하는 사전 교육 모델을 제공합니다.
+이것은 단어를 연상시키는 틀에 대한 일종의 단어 감각의 모호함을 제공합니다.
+
+예를 들어 보겠습니다.
+
+```python
+# load model
+tagger = SequenceTagger.load('frame')
+
+# make English sentence
+sentence_1 = Sentence('George returned to Berlin to return his hat.')
+sentence_2 = Sentence('He had a look at different hats.')
+
+# predict NER tags
+tagger.predict(sentence_1)
+tagger.predict(sentence_2)
+
+# print sentence with predicted tags
+print(sentence_1.to_tagged_string())
+print(sentence_2.to_tagged_string())
+```
+출력 결과: 
+
+```console
+George returned <return.01> to Berlin to return <return.02> his hat .
+
+He had <have.LV> a look <look.01> at different hats .
+```
+
+우리가 볼 수 있듯이, 프레임 감지기는 문장 1에서 '반환'이라는 단어의 두 가지 다른 의미 사이를 구별한다.
+'return.01'은 위치로 돌아가는 것을 의미하고, 'return.02'는 무언가를 돌려주는 것을 의미한다.
+
+비슷하게, 문장 2에서 프레임 탐지기는 'have'가 라이트 동사인 경동사 구조를 찾는다.
+look은 단어를 연상시키는 틀이다.
+
+### 문장 목록 태그 지정
+
+종종 전체 텍스트 말뭉치에 태그를 지정할 수 있습니다. 이 경우에, 당신은 말뭉치를 문장으로 나누고 통과시킬 필요가 있다.
+.predict() 메서드에 대한 'Sentence' 개체 목록입니다.
+
+예를 들어 segtok의 문장 분할기를 사용하여 텍스트를 분할할 수 있습니다.
+
+```python
+from flair.models import SequenceTagger
+from flair.tokenization import SegtokSentenceSplitter
+
+# example text with many sentences
+text = "This is a sentence. This is another sentence. I love Berlin."
+
+# initialize sentence splitter
+splitter = SegtokSentenceSplitter()
+
+# use splitter to split text into list of sentences
+sentences = splitter.split(text)
+
+# predict tags for sentences
+tagger = SequenceTagger.load('ner')
+tagger.predict(sentences)
+
+# iterate through sentences and print predicted labels
+for sentence in sentences:
+    print(sentence.to_tagged_string())
+```
+
+`.predict()` 메서드의 `mini_batch_size` 매개 변수를 사용하여, 다음에 전달된 미니 배치의 크기를 설정할 수 있습니다.
+태그거. 리소스에 따라 이 매개 변수를 사용하여 속도를 최적화할 수 있습니다.
+
+
+## 사전 교육된 텍스트 분류 모델을 사용한 태그 지정
+
+긍정 또는 부정 의견을 탐지하기 위해 사전 훈련된 모델을 사용하겠습니다.
+이 모델은 제품과 영화 리뷰 데이터셋의 혼합에 대해 교육되었으며 긍정적인 것을 인식할 수 있습니다.
+그리고 영어 본문에는 부정적인 정서가 있습니다.
+
+```python
+from flair.models import TextClassifier
+
+# load tagger
+classifier = TextClassifier.load('sentiment')
+```
+
+여러분은 문장에서 분류자의 `predict()`방법만 사용하면 됩니다. 예측 레이블에 추가하고, 긍정적인 느낌의 문장을 사용해봅시다.
+
+```python
+# make example sentence
+sentence = Sentence("enormously entertaining for moviegoers of any age.")
+
+# call predict
+classifier.predict(sentence)
+
+# check prediction
+print(sentence)
+```
+
+출력 결과:
+```console
+Sentence: "enormously entertaining for moviegoers of any age."   [− Tokens: 8  − Sentence-Labels: {'class': [POSITIVE (0.9976)]}]
+```
+
+POSITION이라는 라벨이 문장에 추가되어 이 문장이 긍정적인 감정을 가지고 있음을 나타냅니다.
+
+### 사전 교육 텍스트 분류 모델 목록
+
+적절한 교육을 통과하여 로드할 사전 교육 모델을 선택합니다.
+문자열은 `TextClassifier` 클래스의 `load()` 메서드로 이동합니다. 현재 다음과 같은 사전 교육 모델 제공됨:
+
+| ID | 언어 | 태스크 | 훈련 데이터셋 | 정확도 |
+| ------------- | ---- | ------------- |------------- |------------- |
+| 'sentiment' | English | detecting positive and negative sentiment (transformer-based) | movie and product reviews |  **98.87** |
+| 'sentiment-fast' | English | detecting positive and negative sentiment (RNN-based) | movie and product reviews |  **96.83**|
+| 'communicative-functions' | English | detecting function of sentence in research paper (BETA) | scholarly papers |  |
+| 'de-offensive-language' | German | detecting offensive language | [GermEval 2018 Task 1](https://projects.fzai.h-da.de/iggsa/projekt/) |  **75.71** (Macro F1) |
+
+## 교육 데이터 없이 새 클래스 태그 지정
+
+포함되지 않은 클래스에 레이블을 지정해야 하는 경우
+사전 훈련된 제로샷 분류기 TARS
+([제로샷 자습서](/resources/docs/TUTORIAL_10_TRA로 건너뛰기)INING_ZERO_SHOT_MODEL.md)).
+TARS는 임의 클래스에 대해 텍스트 분류를 수행할 수 있습니다.
+
+## 다음
+
+이제 텍스트를 포함하기 위해 다른 [워드 임베딩](/리소스/docs/TUTORIAL_3_WORD_EMBeding.md)을 사용하는 방법에 대해 알아보겠습니다.

From 013f01531916c2f879c99ef43f5002125dc6316b Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:34:30 +0900
Subject: [PATCH 03/30] Create TUTORIAL_3_WORD_EMBEDDING.md

---
 .../KOR_docs/TUTORIAL_3_WORD_EMBEDDING.md     | 153 ++++++++++++++++++
 1 file changed, 153 insertions(+)
 create mode 100644 resources/docs/KOR_docs/TUTORIAL_3_WORD_EMBEDDING.md

diff --git a/resources/docs/KOR_docs/TUTORIAL_3_WORD_EMBEDDING.md b/resources/docs/KOR_docs/TUTORIAL_3_WORD_EMBEDDING.md
new file mode 100644
index 000000000..af49ca953
--- /dev/null
+++ b/resources/docs/KOR_docs/TUTORIAL_3_WORD_EMBEDDING.md
@@ -0,0 +1,153 @@
+# 튜토리얼 3: 워드 임베딩
+
+다양한 방법으로 문장에 단어를 삽입할 수 있는 일련의 수업을 제공합니다.
+그 이전의 튜토리얼 1,2에 대해 어느 정도 학습하셨다고 가정하고 진행하겠습니다.
+
+
+## 임베딩
+
+모든 단어 임베딩 클래스는 `TokenEmbeddings` 클래스에서 상속되며 필요한 `embed()` 를 호출하여 텍스트를 포함합니다.
+필요한 임베딩 클래스를 인스턴스화하고 `embed()`로 호출하여 텍스트를 임베딩합니다.
+우리의 방법으로 생산된 모든 임베딩은 PyTorch 벡터이기 때문에 즉시 훈련에 사용될 수 있고 미세 조정이 가능합니다.
+
+이 튜토리얼에서는 몇 가지 일반적인 임베딩을 소개하고 사용 방법을 보여줍니다. 
+이러한 임베딩에 대한 자세한 내용과 지원되는 모든 임베딩에 대한 개요는 [여기](/docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMbeding.md)를 참조하세요.
+
+## 클래식 워드 임베딩
+
+고전적인 단어 임베딩은 정적인 성향과 단어 수준을 가지며, 이는 각 개별 단어가 정확히 하나의 사전 계산된 임베딩을 얻는다는 것을 의미합니다.
+널리 사용되는 GloVe 또는 Komninos 임베딩을 포함한 대부분의 임베딩이 이 클래스에 속합니다.
+
+우선 `WordEmbedings` 클래스를 인스턴스화하고 로드할 임베딩의 문자열 식별자를 전달합니다.
+GloVe 임베딩을 사용하려면 'glove' 문자열을 생성자에게 전달하십시오.
+
+```python
+from flair.embeddings import WordEmbeddings
+from flair.data import Sentence
+
+# init embedding
+glove_embedding = WordEmbeddings('glove')
+```
+
+이제 예제 문장을 만들고 임베딩의 `embed()` 메서드를 호출합니다. 일부 임베딩 유형은 속도를 높이기 위해 배치를 사용하기 때문에 문장 목록을 이 방법으로 전달할 수도 있습니다.
+
+```python
+# create sentence.
+sentence = Sentence('The grass is green .')
+
+# embed a sentence using glove.
+glove_embedding.embed(sentence)
+
+# now check out the embedded tokens.
+for token in sentence:
+    print(token)
+    print(token.embedding)
+```
+
+
+이렇게 하면 토큰과 임베딩이 출력됩니다. GloVe 임베딩은 차수 100의 PyTorch 벡터입니다.
+
+적절한 내용을 전달하여 로드하는 사전 교육된 임베딩을 선택합니다.
+`WordEmbedings` 클래스의 생성자에 대한 ID 문자열입니다. 일반적으로
+**두 글자로 된 언어 코드**는 임베딩을 시작하므로 영어의 'en'과
+독일어 등을 나타내는 'de'입니다. 기본적으로 Wikipedia를 통해 학습된 FastText 임베딩이 초기화됩니다.
+또한 '-crawl'로 인스턴스화하여 웹 크롤을 통해 언제든지 FastText 임베딩을 사용할 수 있습니다. 
+따라서 독일 웹 크롤을 통해 학습된 임베딩을 사용하기 위해 'de-crawl'을 사용합니다.
+
+```python
+german_embedding = WordEmbeddings('de-crawl')
+```
+
+이 클래스에 대한 자세한 설명과 함께 [여기](/docs/embeddings/CLASSIC_WORD_EMBEDings.md) 모든 워드 임베딩 모델의 전체 목록을 확인할 수 있습니다.
+일반적으로 FastText 임베딩 또는 GloVe를 사용하는 것이 좋습니다.
+
+
+## Flair 임베딩
+
+상황별 문자열 임베딩은 [powerful embeddings](https://www.aclweb.org/anthology/C18-1139/)
+표준 단어 임베딩을 넘어서는 잠재적인 구문 분석 정보를 캡처합니다. 주요 차이점은 
+(1) 단어에 대한 명확한 개념 없이 훈련되고 따라서 기본적으로 단어를 문자 시퀀스로 모델링합니다. 
+(2) 주변 텍스트에 의해 **contextualized**됩니다. 이는 *동일 단어의 문맥적 용도에 따라* 다른 임베딩이 있음을 의미합니다.
+
+Flair를 사용할 때, 표준 단어 임베딩과 같은 적절한 임베딩 클래스를 인스턴스화하기만 하면 이러한 임베딩을 사용할 수 있습니다.
+
+```python
+from flair.embeddings import FlairEmbeddings
+
+# init embedding
+flair_embedding_forward = FlairEmbeddings('news-forward')
+
+# create a sentence
+sentence = Sentence('The grass is green .')
+
+# embed words in sentence
+flair_embedding_forward.embed(sentence)
+```
+
+`FlairEmbedings` 클래스의 생성자에게 적절한 문자열을 전달하여 로드할 임베딩을 선택합니다. 
+지원되는 모든 언어에는 전진 및 후진 모델이 있습니다. 
+**2글자 언어 코드**에 이어 하이픈 및 **앞으로** 또는 **뒤로**를 사용하여 언어의 모델을 로드할 수 있습니다. 
+독일어 Flair 모델을 앞뒤로 로드하려면 다음과 같이 하십시오.
+
+```python
+# init forward embedding for German
+flair_embedding_forward = FlairEmbeddings('de-forward')
+flair_embedding_backward = FlairEmbeddings('de-backward')
+```
+
+표준 사용에 대한 자세한 정보와 함께 사전 훈련된 모든 FlairEmbedings 모델 [여기](/리소스/docs/embeddings/FLAIR_EMBEDDINGS.md)의 전체 목록을 확인하십시오.
+
+## 스택 임베딩
+
+스택형 임베딩은 이 라이브러리의 가장 중요한 개념 중 하나입니다. 예를 들어, 두 개의 기존 임베딩을 모두 상황에 맞는 문자열과 함께 사용하려는 경우 이러한 임베딩을 사용하여 서로 다른 임베딩을 함께 결합할 수 있습니다.
+스택형 임베딩을 사용하면 혼합 및 일치시킬 수 있습니다.
+
+`StackedEmbedings` 클래스를 사용하여 결합하고자 하는 임베딩 목록을 전달하여 인스턴스화하기만 하면 됩니다.
+
+예를 들어, 고전적인 GloVe 임베딩을 전방 및 후방 Flair 임베딩과 결합해봅시다. 이는 특히 시퀀스 라벨링에 대해 일반적으로 권장하는 조합입니다.
+
+먼저 결합할 두 개의 임베딩을 인스턴스화합니다.
+
+```python
+from flair.embeddings import WordEmbeddings, FlairEmbeddings
+
+# init standard GloVe embedding
+glove_embedding = WordEmbeddings('glove')
+
+# init Flair forward and backwards embeddings
+flair_embedding_forward = FlairEmbeddings('news-forward')
+flair_embedding_backward = FlairEmbeddings('news-backward')
+```
+
+이제 'StackedEmbedings' 클래스를 인스턴스화하고 두 개의 임베딩이 포함된 목록을 전달합니다.
+
+```python
+from flair.embeddings import StackedEmbeddings
+
+# create a StackedEmbedding object that combines glove and forward/backward flair embeddings
+stacked_embeddings = StackedEmbeddings([
+                                        glove_embedding,
+                                        flair_embedding_forward,
+                                        flair_embedding_backward,
+                                       ])
+```
+
+이제 이 임베딩을 다른 모든 임베딩과 마찬가지로 사용하면 됩니다. 즉, 문장 위에 'embed()' 방법을 사용합니다.
+
+```python
+sentence = Sentence('The grass is green .')
+
+# just embed a sentence using the StackedEmbedding as you would with any single embedding.
+stacked_embeddings.embed(sentence)
+
+# now check out the embedded tokens.
+for token in sentence:
+    print(token)
+    print(token.embedding)
+```
+
+단어들은 이제 세 가지 다른 임베딩의 연결을 사용하여 내장됩니다. 이는 결과 임베딩 벡터가 여전히 단일 PyTorch 벡터임을 의미합니다.
+
+## Next 
+이러한 임베딩에 대한 자세한 내용과 지원되는 모든 단어 임베딩에 대한 전체 개요를 보려면 다음을 참조하십시오.
+[튜토리얼](/리소스/docs/튜토리얼_4_ELMO_BERT_FLAIR_EMBeding.md). 

From 1bb4be8b5b175c1e7b7ac9b7940f00b747493733 Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:34:50 +0900
Subject: [PATCH 04/30] Create TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md

---
 .../TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md   | 64 +++++++++++++++++++
 1 file changed, 64 insertions(+)
 create mode 100644 resources/docs/KOR_docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md

diff --git a/resources/docs/KOR_docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md b/resources/docs/KOR_docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md
new file mode 100644
index 000000000..48dd4ee27
--- /dev/null
+++ b/resources/docs/KOR_docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md
@@ -0,0 +1,64 @@
+# 튜토리얼 4 : Word Embedding의 종류
+이번 챕터는 튜토리얼이라기보다 Flair에서 지원하는 Embedding의 종류를 소개합니다. 아래 테이블의 Embedding을 클릭해 사용법을 볼 수 있습니다. 설명들은 [base types](/resources/docs/TUTORIAL_1_BASICS.md)과 [standard word embeddings](/resources/docs/TUTORIAL_3_WORD_EMBEDDING.md), 그리고 `StackedEmbeddings`클래스에 익숙하다는 전제로 작성되어 있습니다.
+
+## 개요
+모든 word embedding 클래스들은 `TokenEmbeddings` 클래스를 상속하고 있으며 텍스트를 임베드 하기 위해 `embed()` 메소드를 호출합니다. Flair를 사용하는 대부분의 경우 다양하고 복잡한 embedding 과정이 인터페이스 뒤로 숨겨져 있습니다. 사용자는 단순히 필요한 embedding 클래스를 인스턴스화하고 `embed()`를 호출해 텍스트를 임베드 하면 됩니다.
+
+현재 지원하고 있는 임베딩의 종류입니다 :
+
+| Class | Type | Paper | 
+| ------------- | -------------  | -------------  | 
+| [`BytePairEmbeddings`](https://github.com/flairNLP/flair/tree/master/resources/docs/embeddings/BYTE_PAIR_EMBEDDINGS.md) | Subword-level word embeddings | [Heinzerling and Strube (2018)](https://www.aclweb.org/anthology/L18-1473)  |
+| [`CharacterEmbeddings`](https://github.com/flairNLP/flair/tree/master/resources/docs/embeddings/CHARACTER_EMBEDDINGS.md) | Task-trained character-level embeddings of words | [Lample et al. (2016)](https://www.aclweb.org/anthology/N16-1030) |
+| [`ELMoEmbeddings`](https://github.com/flairNLP/flair/tree/master/resources/docs/embeddings/ELMO_EMBEDDINGS.md) | Contextualized word-level embeddings | [Peters et al. (2018)](https://aclweb.org/anthology/N18-1202)  |
+| [`FastTextEmbeddings`](https://github.com/flairNLP/flair/tree/master/resources/docs/embeddings/FASTTEXT_EMBEDDINGS.md) | Word embeddings with subword features | [Bojanowski et al. (2017)](https://aclweb.org/anthology/Q17-1010)  |
+| [`FlairEmbeddings`](https://github.com/flairNLP/flair/tree/master/resources/docs/embeddings/FLAIR_EMBEDDINGS.md) | Contextualized character-level embeddings | [Akbik et al. (2018)](https://www.aclweb.org/anthology/C18-1139/)  |
+| [`OneHotEmbeddings`](https://github.com/flairNLP/flair/tree/master/resources/docs/embeddings/ONE_HOT_EMBEDDINGS.md) | Standard one-hot embeddings of text or tags | - |
+| [`PooledFlairEmbeddings`](https://github.com/flairNLP/flair/tree/master/resources/docs/embeddings/FLAIR_EMBEDDINGS.md) | Pooled variant of `FlairEmbeddings` |  [Akbik et al. (2019)](https://www.aclweb.org/anthology/N19-1078/)  | 
+| [`TransformerWordEmbeddings`](https://github.com/flairNLP/flair/tree/master/resources/docs/embeddings/TRANSFORMER_EMBEDDINGS.md) | Embeddings from pretrained [transformers](https://huggingface.co/transformers/pretrained_models.html) (BERT, XLM, GPT, RoBERTa, XLNet, DistilBERT etc.) | [Devlin et al. (2018)](https://www.aclweb.org/anthology/N19-1423/) [Radford et al. (2018)](https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf)  [Liu et al. (2019)](https://arxiv.org/abs/1907.11692) [Dai et al. (2019)](https://arxiv.org/abs/1901.02860) [Yang et al. (2019)](https://arxiv.org/abs/1906.08237) [Lample and Conneau (2019)](https://arxiv.org/abs/1901.07291) |  
+| [`WordEmbeddings`](https://github.com/flairNLP/flair/tree/master/resources/docs/embeddings/CLASSIC_WORD_EMBEDDINGS.md) | Classic word embeddings |  |
+
+## BERT와 Flair 조합하기
+우리는 Flair, ELMo, BERT 그리고 고전적 word embedding을 쉽게 결합할 수 있습니다. 조합하려는 임베딩을 각각 인스턴스화하고 `StackedEmbedding`에서 사용하면 됩니다.
+아래는 다국어 Flair와 BERT 임베딩을 사용해 강력한 다국어 다운스트림 작업 모델을 훈련하는 예시입니다.
+
+우선 조합하고자 하는 임베딩을 인스턴스화합니다.
+```python
+from flair.embeddings import FlairEmbeddings, TransformerWordEmbeddings
+
+# Flair 임베딩 초기화
+flair_forward_embedding = FlairEmbeddings('multi-forward')
+flair_backward_embedding = FlairEmbeddings('multi-backward')
+
+# 다국어 BERT 초기화
+bert_embedding = TransformerWordEmbeddings('bert-base-multilingual-cased')
+```
+
+이제 `StackedEmbeddings` 클래스를 초기화 하고 앞에서 초기화한 세가지 임베딩이 포함된 목록을 전달합니다.
+
+```python
+from flair.embeddings import StackedEmbeddings
+
+# 앞에서 초기화 한 임베딩을 결합한 StackedEmbedding 객체를 생성합니다.
+stacked_embeddings = StackedEmbeddings(
+    embeddings=[flair_forward_embedding, flair_backward_embedding, bert_embedding])
+```
+
+완성입니다! 다른 임베딩을 사용하는 것과 마찬가지로 문장에 대해 `embed()` 메서드를 호출하면 됩니다.
+
+```python
+sentence = Sentence('The grass is green .')
+
+# 단일 임베딩을 사용하는 것과 마찬가지로 StackedEmbedding을 사용합니다.
+stacked_embeddings.embed(sentence)
+
+# 문장에 대한 Token을 확인합니다.
+for token in sentence:
+    print(token)
+    print(token.embedding)
+```
+
+단어들은 세 가지 다른 임베딩이 조합된 것으로 임베드 되었습니다. output은 여전히 PyTorch 벡터입니다.
+
+## 다음 튜토리얼
+텍스트 분류와 같은 작업을 위해 전체 텍스트 [문서를 임베드](/resources/docs/TUTORIAL_5_DOCUMENT_EMBEDDINGS.md)하는 튜토리얼 혹은 [나만의 모델을 훈련](/resources/docs/TUTORIAL_7_TRAINING_A_MODEL.md)하기 위한 전제조건인 [말뭉치(corpus)를 로드](/resources/docs/TUTORIAL_6_CORPUS.md)하는 튜토리얼이 준비되어 있습니다.

From ac5a78f4dca8c83c8d447c3d1c11e649bf0ce12f Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:35:06 +0900
Subject: [PATCH 05/30] Create TUTORIAL_5_DOCUMENT_EMBEDDINGS.md

---
 .../TUTORIAL_5_DOCUMENT_EMBEDDINGS.md         | 132 ++++++++++++++++++
 1 file changed, 132 insertions(+)
 create mode 100644 resources/docs/KOR_docs/TUTORIAL_5_DOCUMENT_EMBEDDINGS.md

diff --git a/resources/docs/KOR_docs/TUTORIAL_5_DOCUMENT_EMBEDDINGS.md b/resources/docs/KOR_docs/TUTORIAL_5_DOCUMENT_EMBEDDINGS.md
new file mode 100644
index 000000000..5c214aa6c
--- /dev/null
+++ b/resources/docs/KOR_docs/TUTORIAL_5_DOCUMENT_EMBEDDINGS.md
@@ -0,0 +1,132 @@
+# 튜토리얼 5: 문서 임베딩
+우리가 앞서 살펴본 [단어 임베딩](/resources/docs/TUTORIAL_3_WORD_EMBEDDING.md)은 개별 단어에 대한 임베딩을 제공했습니다. 이번에 살펴볼 문서 임베딩은 전체 텍스트에 대해 하나의 임베딩을 제공합니다.
+
+이번 튜토리얼은 여러분이 라이브러리의 [기본 유형](/resources/docs/TUTORIAL_1_BASICS.md)과 [단어 임베딩](/resources/docs/TUTORIAL_3_WORD_EMBEDDING.md)이 어떻게 동작하는지 익숙하다 가정하고 진행됩니다.
+
+## 임베딩
+모든 문서 임베딩 클래스들은 `DocumentEmbeddings` 클래스를 상속하며 텍스트를 임베드 하기 위해 `embed()` 메소드를 호출합니다.
+Flair를 사용하는 대부분의 경우 다양하고 복잡한 embedding 과정이 인터페이스 뒤로 숨겨져 있습니다.
+
+Flair에는 4가지 주요 문서 임베딩이 있습니다.
+
+ 1. `DocumentPoolEmbeddings` 문장 속 모든 단어의 평균을 단순하게 도출합니다.
+ 2. `DocumentRNNEmbeddings` 문장 속 모든 단어들로 RNN을 훈련시킵니다.
+ 3. `TransformerDocumentEmbeddings` 미리 훈련된 변환기를 사용합니다. 대부분의 텍스트 분류 작업에 **권장**합니다.
+ 4. `SentenceTransformerDocumentEmbeddings` 미리 훈련된 변환기를 사용합니다. 문장의 벡터 표현을 필요로 할 때 *권장*합니다.
+
+네 가지 옵션 중 하나를 선택해 초기화하고 `embed()` 메서드를 호출해 텍스트를 임베드 합니다.
+
+아래는 네 가지 문서 임베딩에 대한 세부정보입니다 :
+
+## Documnet Pool Embeddings
+문서 임베딩 중 가장 단순한 유형입니다. 전체 문장에 대한 임베딩을 얻기 위해 문장 속 모든 단어 임베딩에 대해 풀링 연산을 합니다.
+디폴트는 평균 풀링입니다. 이는 모든 단어 임베딩의 평균을 사용합니다.
+
+인스턴스화 하기 위해 다음과 같은 임베딩 리스트를 사용합니다.
+```python
+from flair.embeddings import WordEmbeddings, DocumentPoolEmbeddings
+
+# 워드 임베딩을 초기화합니다.
+glove_embedding = WordEmbeddings('glove')
+
+# 문서 임베딩을 초기화합니다. mode = mean
+document_embeddings = DocumentPoolEmbeddings([glove_embedding])
+```
+이제 `embed()` 메소드를 호출해 문장을 임베드 합니다.
+```python
+# 예시 문장입니다.
+sentence = Sentence('The grass is green . And the sky is blue .')
+
+# 문서 임베딩에 문장을 임베드합니다.
+document_embeddings.embed(sentence)
+
+# 임베드된 문장을 확인합니다.
+print(sentence.embedding)
+```
+문서 임베딩은 단어 임베딩에서 파생됩니다. 그렇기 때문에 단어 임베딩의 차원에 따라 문서의 차원이 달라집니다. 더 자세한 내용은 [여기](https://github.com/flairNLP/flair/blob/master/resources/docs/embeddings/DOCUMENT_POOL_EMBEDDINGS.md)를 참조해주세요.
+
+`DocumentPoolEmbeddings`은 교육할 필요가 없으며 즉각적으로 문서를 임베딩해 사용할 수 있는 장점이 있습니다.
+
+## Document RNN Embeddings
+RNN 임베딩을 사용하기 위해 문장의 모든 단어에 대해 RNN을 실행하고 RNN의 최종 state를 전체 문서에 대한 임베딩으로 사용합니다.
+이를 사용하기 위해 `DocumentRNNEmbeddings`를 토큰 임베딩 목록을 전달하는 것을 통해 초기화합니다.
+
+```python
+from flair.embeddings import WordEmbeddings, DocumentRNNEmbeddings
+
+glove_embedding = WordEmbeddings('glove')
+
+document_embeddings = DocumentRNNEmbeddings([glove_embedding])
+```
+디폴트로 GRU-type RNN이 인스턴스화됩니다. 예제 문장을 만들고 임베딩의 `embed()` 메소드를 호출합니다.
+
+```python
+# 예시 문장입니다.
+sentence = Sentence('The grass is green . And the sky is blue .')
+
+# 문서 임베딩에 문장을 임베드합니다.
+document_embeddings.embed(sentence)
+
+# 임베드된 문장을 확인합니다.
+print(sentence.get_embedding())
+```
+결과물은 전체 문장에 대한 단일 임베딩입니다. 임베딩 차원은 hidden state의 개수와 RNN이 양방향인지 아닌지에 따라 달라집니다. 더 자세한 내용은 [여기](https://github.com/flairNLP/flair/blob/master/resources/docs/embeddings/DOCUMENT_RNN_EMBEDDINGS.md)를 참조해주세요.
+
+**주의** RNN 임베딩을 초기화하면 RNN 가중치가 무작위로 초기화됩니다! 사용을 위해서 사용자의 의도에 알맞게 훈련돼야 합니다.
+
+## TransformerDocumentEmbeddings
+이미 훈련된 [변환기](https://github.com/huggingface/transformers를 통해 전체 문장을 임베딩합니다. 임베딩의 식별자를 통해 다른 변환기를 사용할 수 있습니다.
+
+표준 BERT 변환 모델의 예시입니다:
+```python
+from flair.embeddings import TransformerDocumentEmbeddings
+
+# 임베딩을 초기화합니다.
+embedding = TransformerDocumentEmbeddings('bert-base-uncased')
+
+# 예시 문장입니다.
+sentence = Sentence('The grass is green .')
+
+# 문장을 임베딩합니다.
+embedding.embed(sentence)
+```
+
+RoBERTa의 예시입니다:
+```python
+from flair.embeddings import TransformerDocumentEmbeddings
+
+# 임베딩을 초기화합니다.
+embedding = TransformerDocumentEmbeddings('roberta-base')
+
+# 예시 문장입니다.
+sentence = Sentence('The grass is green .')
+
+# 문장을 임베딩합니다.
+embedding.embed(sentence)
+```
+
+[여기](https://huggingface.co/transformers/pretrained_models.html)에서 모든 모델들의 리스트를 확인할 수 있습니다(BERT, RoBERTa, XLM, XLNet 기타 등등). 이 클래스를 통해 모델들을 사용할 수 있습니다.
+
+## SentenceTransformerDocumentEmbeddings
+[`sentence-transformer`](https://github.com/UKPLab/sentence-transformers) 라이브러리에서 다른 임베딩을 사용할 수도 있습니다. 이 모델들은 사전 훈련된 것으로 범용 벡터 표현을 제공합니다.
+```python
+from flair.data import Sentence
+from flair.embeddings import SentenceTransformerDocumentEmbeddings
+
+# 임베딩을 초기화합니다.
+embedding = SentenceTransformerDocumentEmbeddings('bert-base-nli-mean-tokens')
+
+# 예시 문장입니다.
+sentence = Sentence('The grass is green .')
+
+# 문장을 임베딩합니다.
+embedding.embed(sentence)
+```
+[여기](https://docs.google.com/spreadsheets/d/14QplCdTCDwEmTqrn1LH4yrbKvdogK4oQvYO1K1aPR5M/edit#gid=0) 에서 사전 훈련된 모델의 전체 리스트를 확인할 수 있습니다.
+
+**참고**: 이 임베딩을 사용하기 위해서 `sentence-transformers`를 설치해야 합니다.
+
+`pip install sentence-transformers`. 
+
+## 다음 튜토리얼
+[나만의 모델을 훈련](/resources/docs/TUTORIAL_7_TRAINING_A_MODEL.md)하기 위한 전제조건인 [말뭉치(corpus)를 로드](/resources/docs/TUTORIAL_6_CORPUS.md)하는 튜토리얼이 준비되어 있습니다.

From ab9d1d1833517a5d4ec7d4a0e2404650b3982fcf Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:35:28 +0900
Subject: [PATCH 06/30] Create TUTORIAL_6_CORPUS.md

---
 resources/docs/KOR_docs/TUTORIAL_6_CORPUS.md | 486 +++++++++++++++++++
 1 file changed, 486 insertions(+)
 create mode 100644 resources/docs/KOR_docs/TUTORIAL_6_CORPUS.md

diff --git a/resources/docs/KOR_docs/TUTORIAL_6_CORPUS.md b/resources/docs/KOR_docs/TUTORIAL_6_CORPUS.md
new file mode 100644
index 000000000..cd510015f
--- /dev/null
+++ b/resources/docs/KOR_docs/TUTORIAL_6_CORPUS.md
@@ -0,0 +1,486 @@
+# 튜토리얼 6 : 훈련 데이터 불러오기
+이번 튜토리얼은 모델을 훈련하기 위해 말뭉치(corpus)를 로드하는 내용을 다룹니다. 
+이번 튜토리얼은 여러분이 라이브러리의 [기본 유형](/resources/docs/TUTORIAL_1_BASICS.md)에 익숙하다 가정하고 진행됩니다.
+
+## 말뭉치 오브젝트
+`corpus`는 모델을 훈련하는데 사용되는 데이터 세트입니다. 이는 모델 훈련 중 훈련, 검증 및 테스트 분할에 사용되는 문장들, 개발을 위한 문장 목록 및 테스트 문장 목록으로 구성됩니다.
+
+다음 예제는 the Universal Dependency Treebank for English를 말뭉치 오브젝트로 초기화하는 코드입니다.
+```python
+import flair.datasets
+corpus = flair.datasets.UD_ENGLISH()
+```
+위 코드를 처음 실행한다면 the Universal Dependency Treebank for English를 하드디스크에 다운로드합니다.
+그 다음 훈련, 테스트, 개발을 위한 `corpus`로 분할합니다. 아래 코드를 통해 각각의 `corpus`에 몇개의 문장이 들어있는지 확인할 수 있습니다.
+```python
+# 몇개의 문장이 train split에 있는지 출력합니다.
+print(len(corpus.train))
+
+# 몇개의 문장이 test split에 있는지 출력합니다.
+print(len(corpus.test))
+
+# 몇개의 문장이 dev split에 있는지 출력합니다.
+print(len(corpus.dev))
+```
+
+각 split의 객체에 직접 접근할 수 있습니다. 아래의 코드는 test split의 처음 문장을 출력합니다 :
+```python
+# training split의 처음 문장을 출력합니다.
+print(corpus.test[0])
+```
+결과입니다 : 
+```console
+Sentence: "What if Google Morphed Into GoogleOS ?" - 7 Tokens
+```
+
+이 문장은 통사적, 형태학적 정보가 tag되어 있습니다. POS 태그를 사용해 문장을 인쇄해보겠습니다 :
+```python
+# print the first Sentence in the training split
+print(corpus.test[0].to_tagged_string('pos'))
+```
+결과입니다 : 
+```console
+What <WP> if <IN> Google <NNP> Morphed <VBD> Into <IN> GoogleOS <NNP> ? <.>
+```
+이 말뭉치는 tag되어 있고 훈련에 사용할 수 있습니다.
+
+### 도움을 주는 함수들
+`corpus`는 유용한 도움 함수들이 많이 포함되어 있습니다. `downsample()`을 호출하고 비율을 정해 데이터를 다운샘플링 할 수 있습니다. 
+우선 말뭉치를 얻습니다.
+```python
+import flair.datasets
+corpus = flair.datasets.UD_ENGLISH()
+```
+그리고 말뭉치를 다운샘플링합니다.
+```python
+import flair.datasets
+downsampled_corpus = flair.datasets.UD_ENGLISH().downsample(0.1)
+```
+두 말뭉치를 출력하는 것을 통해 10%를 다운 샘플링 한 것을 확인할 수 있습니다.
+```python
+print("--- 1 Original ---")
+print(corpus)
+
+print("--- 2 Downsampled ---")
+print(downsampled_corpus)
+```
+결과입니다 :
+```console
+--- 1 Original ---
+Corpus: 12543 train + 2002 dev + 2077 test sentences
+
+--- 2 Downsampled ---
+Corpus: 1255 train + 201 dev + 208 test sentences
+```
+
+### 레이블 사전 만들기
+다수의 경우 예측할 레이블이 포함되어 있는 "사전"이 필요합니다. `make_label_dictionary` 메소드를 호출하고 `label_type`을 전달해 `corpus`에서 바로 사전을 만들 수 있습니다.
+
+예를 들어, 위에서 인스턴스화된 UD_ENGLISH 말뭉치들은 일반 POS tags('POS'), 범용 POS tags('upos'), 형태학적 tags('tense', 'number'...) 등 여러 레이어의 주석을 가지고 있습니다. 다음 코드는 `label_type='upos'`를 인자로 사용하는 예시입니다.
+```python
+# 범용 POS tag 작업에 대한 레이블 사전을 만듭니다.
+upos_dictionary = corpus.make_label_dictionary(label_type='upos')
+
+# 사전을 출력합니다.
+print(upos_dictionary)
+```
+결과입니다 :
+```console
+Dictionary with 17 tags: PROPN, PUNCT, ADJ, NOUN, VERB, DET, ADP, AUX, PRON, PART, SCONJ, NUM, ADV, CCONJ, X, INTJ, SYM
+```
+
+#### 다른 레이블 유형에 대한 사전
+위의 예에서 `make_label_dictionary`를 호출하면 동일한 말뭉치에 있는 모든 레이블 유형에 대한 통계가 인쇄됩니다.
+```console
+Corpus contains the labels: upos (#204585), lemma (#204584), pos (#204584), dependency (#204584), number (#68023), verbform (#35412), prontype (#33584), person (#21187), tense (#20238), mood (#16547), degree (#13649), definite (#13300), case (#12091), numtype (#4266), gender (#4038), poss (#3039), voice (#1205), typo (#332), abbr (#126), reflex (#100), style (#33), foreign (#18)
+```
+UD_ENGLISH 말뭉치는 이런 레이블을 가지고 있으며 이에 대한 사전을 만들 수 있습니다. 아래의 예시는 일반 POS tags와 형태학적 숫자 tags에 관한 사전을 만드는 예시입니다.
+```python
+# 일반 POS tags를 위한 사전을 만듭니다.
+pos_dictionary = corpus.make_label_dictionary(label_type='pos')
+
+# 형태학적 숫자 tags를 위한 사전을 만듭니다.
+tense_dictionary = corpus.make_label_dictionary(label_type='number')
+```
+만약 위 사전을 출력한다면 POS 사전에는 50개의 태그가 있고 이 말뭉치에 대한 숫자 사전이 2개(단수 및 복수)만 포함되어 있습니다.
+
+#### 다른 말뭉치를 위한 사전
+`make_label_dictionary` 메소드는 텍스트 분류 말뭉치를 포함하여 모든 말뭉치에 사용할 수 있습니다 :
+```python
+# 텍스트 분류 작업을 위해 레이블 사전을 만듭니다.
+corpus = flair.datasets.TREC_6()
+print(corpus.make_label_dictionary('question_class'))
+```
+
+### 다중 말뭉치(MultiCorpus) 오브젝트
+한 번에 여러 태스크를 훈련시키려면 `MultiCorpus` 오브젝트를 사용하세요. `MultiCorpus`를 초기화하기 위해선 원하는 만큼의 `Corpus`오브젝트를 먼저 만들어야 합니다. 그 다음 `Corpus` 리스트를 `MultiCorpus` 오브젝트에 넘겨주면 됩니다. 아래 코드는 Universal Dependency Treebanks 형식의 영어, 독일어, 네덜란드어가 결합된 말뭉치를 로드합니다 :
+```python
+english_corpus = flair.datasets.UD_ENGLISH()
+german_corpus = flair.datasets.UD_GERMAN()
+dutch_corpus = flair.datasets.UD_DUTCH()
+
+# 세 언어로 구성된 다중 말뭉치를 만듭니다.
+from flair.data import MultiCorpus
+multi_corpus = MultiCorpus([english_corpus, german_corpus, dutch_corpus])
+```
+`MultiCorpus`는 `Corpus`를 상속하기 때문에 모델을 교육하는데 사용할 수 있습니다.
+
+## Flair가 포함하고 있는 데이터셋
+Flair는 많은 데이터셋을 지원합니다. 사용자가 해당 생성자 ID를 처음 호출할 때 자동으로 데이터를 다운로드하고 설정합니다.
+
+아래는 지원되는 데이터셋입니다. (**클릭하면 확장됩니다.**)
+<details>
+  <summary>Named Entity Recognition (NER) datasets</summary>
+
+#### Named Entity Recognition
+
+| Object | Languages | Description |
+| -------------    | ------------- |-------------  |
+| 'CONLL_03' | English  |  [CoNLL-03](https://www.clips.uantwerpen.be/conll2002/ner/) 4-class NER (requires manual download) |
+| 'CONLL_03_GERMAN' | German  |  [CoNLL-03](https://www.clips.uantwerpen.be/conll2002/ner/) 4-class NER (requires manual download) |
+| 'CONLL_03_DUTCH' | Dutch  |  [CoNLL-03](https://www.clips.uantwerpen.be/conll2002/ner/) 4-class NER |
+| 'CONLL_03_SPANISH' | Spanish  |  [CoNLL-03](https://www.clips.uantwerpen.be/conll2002/ner/) 4-class NER |
+| 'NER_ARABIC_ANER' | Arabic  |  [Arabic Named Entity Recognition Corpus](http://curtis.ml.cmu.edu/w/courses/index.php/ANERcorp) 4-class NER |
+| 'NER_ARABIC_AQMAR' | Arabic  |  [American and Qatari Modeling of Arabic](http://www.cs.cmu.edu/~ark/AQMAR/) 4-class NER (modified) |
+| 'NER_BASQUE' | Basque  |  [NER dataset for Basque](http://ixa2.si.ehu.eus/eiec/) |
+| 'NER_CHINESE_WEIBO' | Chinese  | [Weibo NER corpus](https://paperswithcode.com/sota/chinese-named-entity-recognition-on-weibo-ner/).  |
+| 'NER_DANISH_DANE' | Danish | [DaNE dataset](https://github.com/alexandrainst/danlp/blob/master/docs/datasets.md#danish-dependency-treebank) | 
+| 'NER_ENGLISH_MOVIE_SIMPLE' | English  |  [NER dataset for movie reviews](https://groups.csail.mit.edu/sls/downloads/movie/) - simple NER |
+| 'NER_ENGLISH_MOVIE_COMPLEX' | English  |  [NER dataset for movie reviews](https://groups.csail.mit.edu/sls/downloads/movie/) - complex NER |
+| 'NER_ENGLISH_PERSON' | English | [PERSON_NER](https://github.com/das-sudeshna/genid) NER with person names | 
+| 'NER_ENGLISH_RESTAURANT' | English  |  [NER dataset for restaurant reviews](https://groups.csail.mit.edu/sls/downloads/restaurant/) |
+| 'NER_ENGLISH_SEC_FILLINGS' | English | [SEC-fillings](https://github.com/juand-r/entity-recognition-datasets) with 4-class NER labels from (Alvarado et al, 2015)[https://aclanthology.org/U15-1010/] here | 
+| 'NER_ENGLISH_STACKOVERFLOW' | English  | NER on StackOverflow posts |
+| 'NER_ENGLISH_TWITTER' | English  |  [Twitter NER dataset](https://github.com/aritter/twitter_nlp/) |
+| 'NER_ENGLISH_WIKIGOLD' | English  |  [Wikigold](https://github.com/juand-r/entity-recognition-datasets/tree/master/data/wikigold) a manually annotated collection of Wikipedia text |
+| 'NER_ENGLISH_WNUT_2020' | English  |  [WNUT-20](https://github.com/jeniyat/WNUT_2020_NER) named entity extraction |
+| 'NER_ENGLISH_WEBPAGES' | English  | 4-class NER on web pages from [Ratinov and Roth (2009)](https://aclanthology.org/W09-1119/) |
+| 'NER_FINNISH' | Finnish | [Finer-data](https://github.com/mpsilfve/finer-data) | 
+| 'NER_GERMAN_BIOFID' | German  |  [CoNLL-03](https://www.aclweb.org/anthology/K19-1081/) Biodiversity literature NER |
+| 'NER_GERMAN_EUROPARL' | German | [German Europarl dataset](https://nlpado.de/~sebastian/software/ner_german.shtml) NER in German EU parliament speeches | 
+| 'NER_GERMAN_GERMEVAL' | German  |  [GermEval 14 NER](https://sites.google.com/site/germeval2014ner/data/) corpus |
+| 'NER_GERMAN_LEGAL' | German | [Legal Entity Recognition](https://github.com/elenanereiss/Legal-Entity-Recognition) NER in German Legal Documents |
+| 'NER_GERMAN_POLITICS' | German | [NEMGP](https://www.thomas-zastrow.de/nlp/) corpus |
+| 'NER_HUNGARIAN' | Hungarian | NER on Hungarian business news |
+| 'NER_ICELANDIC' | Icelandic | NER on Icelandic |
+| 'NER_JAPANESE' | Japanese | [Japanese NER](https://github.com/Hironsan/IOB2Corpus) dataset automatically generated from Wikipedia |
+| 'NER_MASAKHANE' | 10 languages | [MasakhaNER: Named Entity Recognition for African Languages](https://github.com/masakhane-io/masakhane-ner) corpora |
+| 'NER_SWEDISH' | Swedish | [Swedish Spraakbanken NER](https://github.com/klintan/swedish-ner-corpus/) 4-class NER |
+| 'NER_TURKU' | Finnish | [TURKU_NER](https://github.com/TurkuNLP/turku-ner-corpus) NER corpus created by the Turku NLP Group, University of Turku, Finland |
+| 'NER_MULTI_WIKIANN' | 282 languages  | Gigantic [corpus for cross-lingual NER derived from Wikipedia](https://elisa-ie.github.io/wikiann/).  |
+| 'NER_MULTI_WIKINER' | 8 languages | [WikiNER](https://github.com/dice-group/FOX/tree/master/input/Wikiner) NER dataset automatically generated from Wikipedia (English, German, French, Italian, Spanish, Portuguese, Polish, Russian) |
+| 'NER_MULTI_XTREME' | 176 languages  |  [Xtreme](https://github.com/google-research/xtreme) corpus by Google Research for cross-lingual NER consisting of datasets of a total of 176 languages |
+| 'WNUT_17' | English  |  [WNUT-17](https://noisy-text.github.io/2017/files/) emerging entity detection |
+
+</details>
+
+<details>
+  <summary>Biomedical Named Entity Recognition (BioNER) datasets</summary>
+
+#### Biomedical Named Entity Recognition
+
+We support 31 biomedical NER datasets, listed [here](HUNFLAIR_CORPORA.md).
+
+</details>
+
+<details>
+  <summary>Entity Linking (NEL) datasets</summary>
+
+#### Entity Linking
+| Object | Languages | Description |
+| -------------    | ------------- |-------------  |
+| 'NEL_ENGLISH_AIDA' | English  |  [AIDA CoNLL-YAGO Entity Linking corpus](https://www.mpi-inf.mpg.de/departments/databases-and-information-systems/research/ambiverse-nlu/aida/downloads) on the CoNLL-03 corpus |
+| 'NEL_ENGLISH_AQUAINT' | English  | Aquaint Entity Linking corpus introduced in [Milne and Witten (2008)](https://www.cms.waikato.ac.nz/~ihw/papers/08-DNM-IHW-LearningToLinkWithWikipedia.pdf) |
+| 'NEL_ENGLISH_IITB' | English  | ITTB Entity Linking corpus introduced in [Sayali et al. (2009)](https://dl.acm.org/doi/10.1145/1557019.1557073) |
+| 'NEL_ENGLISH_REDDIT' | English  | Reddit Entity Linking corpus introduced in [Botzer et al. (2021)](https://arxiv.org/abs/2101.01228v2) (only gold annotations)|
+| 'NEL_ENGLISH_TWEEKI' | English  | ITTB Entity Linking corpus introduced in [Harandizadeh and Singh (2020)](https://aclanthology.org/2020.wnut-1.29.pdf) |
+| 'NEL_GERMAN_HIPE' | German  | [HIPE](https://impresso.github.io/CLEF-HIPE-2020/) Entity Linking corpus for historical German as a [sentence-segmented version](https://github.com/stefan-it/clef-hipe) |
+
+</details>
+
+
+<details>
+  <summary>Relation Extraction (RE) datasets</summary>
+
+#### Relation Extraction
+| Object | Languages | Description |
+| -------------    | ------------- |------------- |
+| 'RE_ENGLISH_CONLL04' | English  |  [CoNLL-04](https://github.com/bekou/multihead_joint_entity_relation_extraction/tree/master/data/CoNLL04) Relation Extraction |
+| 'RE_ENGLISH_SEMEVAL2010' | English  |  [SemEval-2010 Task 8](https://aclanthology.org/S10-1006.pdf) on Multi-Way Classification of Semantic Relations Between Pairs of Nominals |
+| 'RE_ENGLISH_TACRED' | English  |  [TAC Relation Extraction Dataset](https://nlp.stanford.edu/projects/tacred/) with 41 relations (download required) |
+| 'RE_ENGLISH_DRUGPROT' | English  |  [DrugProt corpus: Biocreative VII Track 1](https://zenodo.org/record/5119892#.YSdSaVuxU5k/) - drug and chemical-protein interactions |
+
+</details>
+
+<details>
+  <summary>GLUE Benchmark datasets</summary>
+
+#### GLUE Benchmark
+| Object | Languages | Description |
+| -------------    | ------------- |------------- |
+| 'GLUE_COLA' | English | The Corpus of Linguistic Acceptability from GLUE benchmark |
+| 'GLUE_MNLI' | English | The Multi-Genre Natural Language Inference Corpus from the GLUE benchmark |
+| 'GLUE_RTE' | English | The RTE task from the GLUE benchmark |
+| 'GLUE_QNLI' | English | The Stanford Question Answering Dataset formated as NLI task from the GLUE benchmark |
+| 'GLUE_WNLI' | English | The Winograd Schema Challenge formated as NLI task from the GLUE benchmark |
+| 'GLUE_MRPC' | English | The MRPC task from GLUE benchmark |
+| 'GLUE_QQP' | English | The Quora Question Pairs dataset where the task is to determine whether a pair of questions are semantically equivalent |
+| 'SUPERGLUE_RTE' | English | The RTE task from the SuperGLUE benchmark |
+
+</details>
+
+<details>
+  <summary>Universal Proposition Banks (UP) datasets</summary>
+
+#### Universal Proposition Banks 
+
+We also support loading the [Universal Proposition Banks](https://github.com/System-T/UniversalPropositions)
+for the purpose of training multilingual frame detection systems. 
+
+| Object | Languages | Description |
+| -------------    | ------------- |------------- |
+| 'UP_CHINESE' | Chinese  |  Universal Propositions for [Chinese](https://github.com/System-T/UniversalPropositions/tree/master/UP_Chinese) |
+| 'UP_ENGLISH'| English  |  Universal Propositions for [English](https://github.com/System-T/UniversalPropositions/tree/master/UP_English-EWT) |
+| 'UP_FINNISH'| Finnish  |  Universal Propositions for [Finnish](https://github.com/System-T/UniversalPropositions/tree/master/UP_Finnish)
+| 'UP_FRENCH'| French  |  Universal Propositions for [French](https://github.com/System-T/UniversalPropositions/tree/master/UP_French)
+| 'UP_GERMAN'| German  |  Universal Propositions for [German](https://github.com/System-T/UniversalPropositions/tree/master/UP_German) |
+| 'UP_ITALIAN', | Italian  |  Universal Propositions for [Italian](https://github.com/System-T/UniversalPropositions/tree/master/UP_Italian) |
+| 'UP_SPANISH' | Spanish  |  Universal Propositions for [Spanish](https://github.com/System-T/UniversalPropositions/tree/master/UP_Spanish) |
+| 'UP_SPANISH_ANCORA' | Spanish (Ancora Corpus)  |  Universal Propositions for [Spanish](https://github.com/System-T/UniversalPropositions/tree/master/UP_Spanish-AnCora) |
+
+</details>
+
+<details>
+  <summary>Universal Dependency Treebanks (UD) datasets</summary>
+
+#### Universal Dependency Treebanks
+
+| Object | Languages | Description |
+| -------------    | ------------- |------------- |
+| 'UD_ARABIC'| Arabic  |  Universal Dependency Treebank for [Arabic](https://github.com/UniversalDependencies/UD_Arabic-PADT) |
+| 'UD_BASQUE'| Basque  |  Universal Dependency Treebank for [Basque](https://github.com/UniversalDependencies/UD_Basque-BDT) |
+| 'UD_BULGARIAN'| Bulgarian  |  Universal Dependency Treebank for [Bulgarian](https://github.com/UniversalDependencies/UD_Bulgarian-BTB)
+| 'UD_CATALAN', | Catalan  |  Universal Dependency Treebank for [Catalan](https://github.com/UniversalDependencies/UD_Catalan-AnCora) |
+| 'UD_CHINESE' | Chinese  |  Universal Dependency Treebank for [Chinese](https://github.com/UniversalDependencies/UD_Chinese-GSD) |
+| 'UD_CHINESE_KYOTO' | Classical Chinese  |  Universal Dependency Treebank for Classical [Chinese](https://github.com/UniversalDependencies/UD_Classical_Chinese-Kyoto/tree/dev) |
+| 'UD_CROATIAN' | Croatian  |  Universal Dependency Treebank for [Croatian](https://github.com/UniversalDependencies/UD_Croatian-SET) |
+| 'UD_CZECH' | Czech  |  Very large Universal Dependency Treebank for [Czech](https://github.com/UniversalDependencies/UD_Czech-PDT) |
+| 'UD_DANISH' | Danish  |  Universal Dependency Treebank for [Danish](https://github.com/UniversalDependencies/UD_Danish-DDT) |
+| 'UD_DUTCH' | Dutch  |  Universal Dependency Treebank for [Dutch](https://github.com/UniversalDependencies/UD_Dutch-Alpino) |
+| 'UD_ENGLISH' | English  |  Universal Dependency Treebank for [English](https://github.com/UniversalDependencies/UD_English-EWT) |
+| 'UD_FINNISH' | Finnish  |  Universal Dependency Treebank for [Finnish](https://github.com/UniversalDependencies/UD_Finnish-TDT) |
+| 'UD_FRENCH' | French  |  Universal Dependency Treebank for [French](https://github.com/UniversalDependencies/UD_French-GSD) |
+|'UD_GERMAN' | German  |  Universal Dependency Treebank for [German](https://github.com/UniversalDependencies/UD_German-GSD) |
+|'UD_GERMAN-HDT' | German  |  Very large Universal Dependency Treebank for [German](https://github.com/UniversalDependencies/UD_German-HDT) |
+|'UD_HEBREW' | Hebrew  |  Universal Dependency Treebank for [Hebrew](https://github.com/UniversalDependencies/UD_Hebrew-HTB) |
+|'UD_HINDI' | Hindi  |  Universal Dependency Treebank for [Hindi](https://github.com/UniversalDependencies/UD_Hindi-HDTB) |
+|'UD_INDONESIAN' | Indonesian  |  Universal Dependency Treebank for [Indonesian](https://github.com/UniversalDependencies/UD_Indonesian-GSD) |
+| 'UD_ITALIAN' | Italian  |  Universal Dependency Treebank for [Italian](https://github.com/UniversalDependencies/UD_Italian-ISDT) |
+| 'UD_JAPANESE'| Japanese  |  Universal Dependency Treebank for [Japanese](https://github.com/UniversalDependencies/UD_Japanese-GSD) |
+|'UD_KOREAN' | Korean  |  Universal Dependency Treebank for [Korean](https://github.com/UniversalDependencies/UD_Korean-Kaist) |
+| 'UD_NORWEGIAN',  | Norwegian  |  Universal Dependency Treebank for [Norwegian](https://github.com/UniversalDependencies/UD_Norwegian-Bokmaal) |
+|  'UD_PERSIAN' | Persian / Farsi  |  Universal Dependency Treebank for [Persian](https://github.com/UniversalDependencies/UD_Persian-Seraji) |
+| 'UD_POLISH'  |  Polish |  Universal Dependency Treebank for [Polish](https://github.com/UniversalDependencies/UD_Polish-LFG) |
+|'UD_PORTUGUESE' | Portuguese  |  Universal Dependency Treebank for [Portuguese](https://github.com/UniversalDependencies/UD_Portuguese-Bosque) |
+| 'UD_ROMANIAN' | Romanian  |  Universal Dependency Treebank for [Romanian](https://github.com/UniversalDependencies/UD_Romanian-RRT)  |
+| 'UD_RUSSIAN' | Russian  |  Universal Dependency Treebank for [Russian](https://github.com/UniversalDependencies/UD_Russian-SynTagRus) |
+| 'UD_SERBIAN' | Serbian  |  Universal Dependency Treebank for [Serbian](https://github.com/UniversalDependencies/UD_Serbian-SET)|
+| 'UD_SLOVAK' | Slovak  |  Universal Dependency Treebank for [Slovak](https://github.com/UniversalDependencies/UD_Slovak-SNK) |
+| 'UD_SLOVENIAN' | Slovenian  |  Universal Dependency Treebank for [Slovenian](https://github.com/UniversalDependencies/UD_Slovenian-SSJ) |
+| 'UD_SPANISH'  | Spanish  |  Universal Dependency Treebank for [Spanish](https://github.com/UniversalDependencies/UD_Spanish-GSD) |
+|  'UD_SWEDISH' | Swedish  |  Universal Dependency Treebank for [Swedish](https://github.com/UniversalDependencies/UD_Swedish-Talbanken) |
+|  'UD_TURKISH' | Turkish  |  Universal Dependency Treebank for [Tturkish](https://github.com/UniversalDependencies/UD_Turkish-IMST) |
+
+</details>
+
+<details>
+  <summary>Text Classification datasets</summary>
+
+#### Text Classification
+| Object | Languages | Description |
+| -------------    | ------------- |------------- |
+| 'AMAZON_REVIEWS' | English |  [Amazon product reviews](https://nijianmo.github.io/amazon/index.html/) dataset with sentiment annotation |
+| 'COMMUNICATIVE_FUNCTIONS' | English |  [Communicative functions](https://github.com/Alab-NII/FECFevalDataset) of sentences in scholarly papers |
+| 'GERMEVAL_2018_OFFENSIVE_LANGUAGE' | German | Offensive language detection for German |
+| 'GO_EMOTIONS' | English | [GoEmotions dataset](https://github.com/google-research/google-research/tree/master/goemotions) Reddit comments labeled with 27 emotions |
+| 'IMDB' | English |  [IMDB](http://ai.stanford.edu/~amaas/data/sentiment/) dataset of movie reviews with sentiment annotation  |
+| 'NEWSGROUPS' | English | The popular [20 newsgroups](http://qwone.com/~jason/20Newsgroups/) classification dataset |
+| 'YAHOO_ANSWERS' | English | The [10 largest main categories](https://course.fast.ai/datasets#nlp) from the Yahoo! Answers |
+| 'SENTIMENT_140' | English | [Tweets dataset](http://help.sentiment140.com/for-students/) with sentiment annotation |
+| 'SENTEVAL_CR' | English | Customer reviews dataset of [SentEval](https://github.com/facebookresearch/SentEval) with sentiment annotation |
+| 'SENTEVAL_MR' | English | Movie reviews dataset of [SentEval](https://github.com/facebookresearch/SentEval) with sentiment annotation |
+| 'SENTEVAL_SUBJ' | English | Subjectivity dataset of [SentEval](https://github.com/facebookresearch/SentEval) |
+| 'SENTEVAL_MPQA' | English | Opinion-polarity dataset of [SentEval](https://github.com/facebookresearch/SentEval) with opinion-polarity annotation |
+| 'SENTEVAL_SST_BINARY' | English | Stanford sentiment treebank dataset of of [SentEval](https://github.com/facebookresearch/SentEval) with sentiment annotation |
+| 'SENTEVAL_SST_GRANULAR' | English | Stanford sentiment treebank dataset of of [SentEval](https://github.com/facebookresearch/SentEval) with fine-grained sentiment annotation |
+| 'TREC_6', 'TREC_50' | English | The [TREC](http://cogcomp.org/Data/QA/QC/) question classification dataset |
+
+</details>
+
+<details>
+  <summary>Text Regression datasets</summary>
+
+#### Text Regression
+| Object | Languages | Description |
+| -------------    | ------------- |------------- |
+| 'WASSA_ANGER' | English | The [WASSA](https://competitions.codalab.org/competitions/16380#learn_the_details) emotion-intensity detection challenge (anger) |
+| 'WASSA_FEAR' | English | The [WASSA](https://competitions.codalab.org/competitions/16380#learn_the_details) emotion-intensity detection challenge (fear) |
+| 'WASSA_JOY' | English | The [WASSA](https://competitions.codalab.org/competitions/16380#learn_the_details) emotion-intensity detection challenge (joy) |
+| 'WASSA_SADNESS' | English | The [WASSA](https://competitions.codalab.org/competitions/16380#learn_the_details) emotion-intensity detection challenge (sadness) |
+
+</details>
+
+<details>
+  <summary>Other Sequence Labeling datasets</summary>
+
+#### Other Sequence Labeling
+
+| Object | Languages | Description |
+| -------------    | ------------- |------------- |
+| 'CONLL_2000' | English  | Syntactic chunking with [CoNLL-2000]((https://www.clips.uantwerpen.be/conll2000/chunking/))  |
+| 'BIOSCOPE' | English  | Negation and speculation scoping wih [BioScope](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-9-S11-S9/) biomedical texts annotated for uncertainty, negation and their scopes |
+| 'KEYPHRASE_INSPEC' | English | Keyphrase dectection with [INSPEC](https://www.aclweb.org/anthology/W03-1028) original corpus (2000 docs) from INSPEC database, adapted by [midas-research](https://arxiv.org/abs/1910.08840) |
+| 'KEYPHRASE_SEMEVAL2017' | English | Keyphrase dectection with [SEMEVAL2017](https://arxiv.org/abs/1704.02853) dataset (500 docs) from ScienceDirect, adapted by [midas-research](https://arxiv.org/abs/1910.08840) |
+| 'KEYPHRASE_SEMEVAL2010' | English | Keyphrase dectection with [SEMEVAL2010](https://www.aclweb.org/anthology/S10-1004/) dataset (~250 docs) from ACM Digital Library, adapted by [midas-research](https://arxiv.org/abs/1910.08840) |
+
+</details>
+
+<details>
+  <summary>Similarity Learning datasets</summary>
+
+#### Experimental: Similarity Learning
+| Object | Languages | Description |
+| -------------    | ------------- |------------- |
+| 'FeideggerCorpus' | German |  [Feidegger](https://github.com/zalandoresearch/feidegger/) dataset fashion images and German-language descriptions  |
+| 'OpusParallelCorpus' | Any language pair | Parallel corpora of the [OPUS](http://opus.nlpl.eu/) project, currently supports only Tatoeba corpus |
+
+</details>
+
+만약 감정 텍스트 분류를 위해 IMDB를 로드하고자 한다면 아래의 코드를 참조해주세요 :
+```python
+import flair.datasets
+corpus = flair.datasets.IMDB()
+```
+위 코드를 통해 모델 교육에 위한 다운로드와 설정이 완료됩니다.
+
+## 고유한 Sequence Labeling Dataset 읽기
+만약 위의 리스트에 없는 Sequence Labeling Dataset에 대해 학습을 원한다면 `ColumnCorpus` 오브젝트로 불러올 수 있습니다. NLP에서 대부분의 Sequence Labeling Dataset은 각 행이 단어이며 각 열은 언어 주석인 형태를 가지고 있습니다. 다음 문장을 보시면 :
+```console
+George N B-PER
+Washington N I-PER
+went V O
+to P O
+Washington N B-LOC
+
+Sam N B-PER
+Houston N I-PER
+stayed V O
+home N O
+```
+
+첫 번째 열은 단어, 두 번째 열은 단순한 POS 태그, 세 번째는 BIO-NER tag입니다. 빈 줄은 문장의 구분을 나타냅니다. 이러한 데이터셋을 읽기 위해서 열 구조를 사전으로 정의하고 `ColumnCorpus`를 인스턴스화하면 됩니다.
+```python
+from flair.data import Corpus
+from flair.datasets import ColumnCorpus
+
+# 열을 정의합니다.
+columns = {0: 'text', 1: 'pos', 2: 'ner'}
+
+# 훈련, 테스트, 개발 파일이 있는 폴더입니다.
+data_folder = '/path/to/data/folder'
+
+# 열의 형식, 데이터 폴더, 훈련, 테스트, 개발 파일을 사용해 말뭉치를 초기화합니다.
+corpus: Corpus = ColumnCorpus(data_folder, columns,
+                              train_file='train.txt',
+                              test_file='test.txt',
+                              dev_file='dev.txt')
+
+```
+위 코드를 통해 훈련, 개발, 테스트를 위해 분리된 `Sentence`의 리스트를 가지고 있는 말뭉치 오브젝트를 만들었습니다. 아래 코드를 통해 훈련 split에 몇개의 문장이 있는지 확인할 수 있습니다 :
+```python
+len(corpus.train)
+```
+또한 문장에 접근해 주석을 확인할 수 있습니다 :
+```python
+print(corpus.train[0].to_tagged_string('ner'))
+print(corpus.train[1].to_tagged_string('pos'))
+```
+이는 아래와 같은 결과를 보여줍니다 : 
+```console
+George <B-PER> Washington <I-PER> went to Washington <B-LOC> .
+
+Sam <N> Houston <N> stayed <V> home <N>
+```
+
+## Text Classification dataset 
+자신의 text Classification dataset을 사용하는 두 가지 방법이 있습니다.
+텍스트와 레이블을 simple CSV 파일 혹은 [FastText 형식](https://fasttext.cc/docs/en/supervised-tutorial.html)으로 데이터를 포맷하면 됩니다.
+
+#### simple CSV file로 로드하기
+많은 text classification dataset은 simple CSV 파일로 배포됩니다. simple CSV 파일은 각 행이 데이터 포인트에 해당하고 열이 텍스트, 레이블, 기타 메타 데이터인 형식을 가지고 있습니다. `CSVClassificationCorpus`에 위에서 본 `ColumnCorpus`같은 열 형식을 전달하는 것으로 CSV 포멧의 classification dataset을 로드할 수 있습니다. 열 형식은 CSV에서 텍스트를 보관하는 열과 레이블을 보관하는 영역을 나타냅니다. 파이썬 CSV 라이브러리는 Excel CSV 포멧을 기본으로 하고있지만 추가적인 파라미터를 통해 [사용자 지정 구분 문자](https://docs.python.org/3/library/csv.html#csv-fmt-params) 혹은 따옴표를 사용할 수 있습니다.
+참고 : 말뭉치 initializer는 자동으로 훈련, 개발, 테스트 split을 폴더에서 검색하기 때문에 분할 CSV 파일들은 각 이름이 적절하게 지정되어야 합니다.(예: `train.csv` `test.csv` `dev.csv`)
+```python
+from flair.data import Corpus
+from flair.datasets import CSVClassificationCorpus
+
+# 훈련, 테스트, 개발 파일이 있는 폴더의 경로입니다.
+data_folder = '/path/to/data'
+
+# 열 형식은 텍스트와 레이블을 포함하는 형식입니다.
+column_name_map = {4: "text", 1: "label_topic", 2: "label_subtopic"}
+
+# 훈련, 테스트, 개발 데이터가 포함된 말뭉치를 로드합니다. 만약 CSV 헤더가 있다면 스킵합니다.
+corpus: Corpus = CSVClassificationCorpus(data_folder,
+                                         column_name_map,
+                                         skip_header=True,
+                                         delimiter='\t',    # tab-separated files
+) 
+```
+
+#### FastText 형식으로 로드하기
+`CSVClassificationCorpus`가 효과적이지 않은 경우 파일의 각 줄이 텍스트 문서를 나타내는 형식의 FastText을 사용합니다.
+문서에는 접두사 `__label__`로 시작하는 한개 이상의 레이블이 있을 수 있습니다. 아래를 참고해주세요 :
+```bash
+__label__<label_1> <text>
+__label__<label_1> __label__<label_2> <text>
+```
+앞에서 언급한 바와 같이 text classification을 하기 위한 `Corpus`를 만들기 위해선 훈련, 개발, 테스트 세 개의 파일이 필요합니다.
+아래의 코드는 IMDB작업을 하는 예시입니다 : 
+```text
+/resources/tasks/imdb/train.txt
+/resources/tasks/imdb/dev.txt
+/resources/tasks/imdb/test.txt
+```
+`/resources/tasks/imdb`를 선택해 `ClassificationCorpus`를 만듭니다.
+파일의 각 줄은 레이블 주석이 있는 `Sentence` 오브젝트로 변환됩니다.
+
+주의 : 한 줄의 텍스트는 여러 문장을 포함하고 있을 수 있기 때문에 `Sentence` 오브젝트는 여러개의 문장으로 구성될 수 있습니다.
+```python
+from flair.data import Corpus
+from flair.datasets import ClassificationCorpus
+
+# 훈련, 테스트, 개발 파일이 있는 폴더의 경로입니다.
+data_folder = '/path/to/data/folder'
+
+# 훈련, 테스트, 개발 데이터가 포함된 말뭉치를 로드합니다.
+corpus: Corpus = ClassificationCorpus(data_folder,
+                                      test_file='test.txt',
+                                      dev_file='dev.txt',
+                                      train_file='train.txt',                                       
+                                      label_type='topic',
+                                      )
+```
+대부분의 경우 말뭉치 initializer는 자동으로 폴더의 훈련, 개발, 테스트 split을 찾습니다. 그렇기 때문에 파일 이름을 직접 지정할 필요가 없습니다. 이 정도면 충분합니다.
+```python
+# 훈련, 테스트, 개발 파일이 있는 폴더의 경로입니다.
+data_folder = '/path/to/data/folder'
+
+# 폴더에서 자동으로 훈련, 개발, 테스트 split을 식별합니다. 
+corpus: Corpus = ClassificationCorpus(data_folder,                                                                            
+                                      label_type='topic',
+                                      )
+```
+`FastText` 형식은 열이 없기 때문에 주석의 이름을 직접 정의해야 합니다. 위 예제는 `label_type='topic'`인 말뭉치를 로드하고 있음을 나타냅니다.
+
+## 다음 튜토리얼
+이제 [나만의 모델을 훈련](/resources/docs/TUTORIAL_7_TRAINING_A_MODEL.md)을 알아보겠습니다.

From 42cf7ea0dc351db91d040c1ab1ce97998c31d7a3 Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:35:45 +0900
Subject: [PATCH 07/30] Create TUTORIAL_7_TRAINING_A_MODEL.md

---
 .../KOR_docs/TUTORIAL_7_TRAINING_A_MODEL.md   | 383 ++++++++++++++++++
 1 file changed, 383 insertions(+)
 create mode 100644 resources/docs/KOR_docs/TUTORIAL_7_TRAINING_A_MODEL.md

diff --git a/resources/docs/KOR_docs/TUTORIAL_7_TRAINING_A_MODEL.md b/resources/docs/KOR_docs/TUTORIAL_7_TRAINING_A_MODEL.md
new file mode 100644
index 000000000..8e89bd524
--- /dev/null
+++ b/resources/docs/KOR_docs/TUTORIAL_7_TRAINING_A_MODEL.md
@@ -0,0 +1,383 @@
+# Tutorial 7: Training a Model
+
+튜토리얼 7에서는 최첨단 word embedding을 사용하여 여러분의 시퀀스 레이블링(sequence labeling)과 텍스트 분류(text classification) 모델을   
+훈련하는 방법을 살펴볼 것입니다.
+
+이 튜토리얼을 학습하기 전에, 다음의 항목들을 이미 알고있다고 가정할 것입니다.
+* Base types: [TUTORIAL_1_BASICS](/docs/TUTORIAL_1_BASICS.md)
+* Word embeddings: [TUTORIAL_3_WORD_EMBEDDING](/docs/TUTORIAL_3_WORD_EMBEDDING.md)
+* Flair embeddings: [TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING](/docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md)
+* Load a corpus: [TUTORIAL_6_CORPUS](/docs/TUTORIAL_6_CORPUS.md)
+
+
+
+## 품사 Tagging 모델 훈련
+
+다음 예제는 간단한 글로브(Glove) 임베딩을 이용하여 UD_ENGLISH (English universal dependency treebank) 데이터를 통해 훈련된 작은 품사 tagger   
+모델에 대한 코드입니다.
+이 예제에서는 더 빠르게 작동시키기 위해 기존 데이터의 10%로 다운샘플링하여 진행했지만, 보통의 경우에는 전체 데이터셋으로 훈련   
+시켜야 합니다:
+
+```python
+from flair.datasets import UD_ENGLISH
+from flair.embeddings import WordEmbeddings, StackedEmbeddings
+from flair.models import SequenceTagger
+from flair.trainers import ModelTrainer
+# 1. get the corpus
+corpus = UD_ENGLISH().downsample(0.1)
+print(corpus)
+# 2. what label do we want to predict?
+label_type = 'upos'
+# 3. make the label dictionary from the corpus
+label_dict = corpus.make_label_dictionary(label_type=label_type)
+print(label_dict)
+# 4. initialize embeddings
+embedding_types = [
+    WordEmbeddings('glove'),
+    # comment in this line to use character embeddings
+    # CharacterEmbeddings(),
+    # comment in these lines to use flair embeddings
+    # FlairEmbeddings('news-forward'),
+    # FlairEmbeddings('news-backward'),
+]
+embeddings = StackedEmbeddings(embeddings=embedding_types)
+# 5. initialize sequence tagger
+tagger = SequenceTagger(hidden_size=256,
+                        embeddings=embeddings,
+                        tag_dictionary=label_dict,
+                        tag_type=label_type,
+                        use_crf=True)
+# 6. initialize trainer
+trainer = ModelTrainer(tagger, corpus)
+# 7. start training
+trainer.train('resources/taggers/example-upos',
+              learning_rate=0.1,
+              mini_batch_size=32,
+              max_epochs=10)
+```
+
+
+또는 전체 데이터에 대해 FlairEmbeddings 및 GloVe와 함께 누적된 임베딩을 150 epochs 만큼 (전체 데이터를 150번 훈련) 사용해 보세요.   
+그렇게 하면 [Akbik et al. (2018)](https://aclanthology.org/C18-1139.pdf)에 보고된 최신 정확도를 얻을 수 있습니다.
+
+모델이 학습되면 이를 사용하여 새 문장의 태그를 예측할 수 있습니다. 모델의 'predict' 메서드를 호출하기만 하면 됩니다.
+
+```python
+# load the model you trained
+model = SequenceTagger.load('resources/taggers/example-pos/final-model.pt')
+# create example sentence
+sentence = Sentence('I love Berlin')
+# predict tags and print
+model.predict(sentence)
+print(sentence.to_tagged_string())
+```
+
+모델이 잘 작동한다면 이 예에서 동사로 'love'를 올바르게 태그할 것입니다.
+
+## Flair Embedding으로 개채명 인식 (NER) 모델 훈련하기
+
+
+NER에 대한 시퀀스 레이블링 모델을 훈련하려면 위의 스크립트를 약간만 수정하면 됩니다.   
+CONLL_03(데이터를 수동으로 다운로드하거나 [different NER corpus](/docs/TUTORIAL_6_CORPUS.md#datasets-included-in-flair) 사용)과 같은 NER corpus를 로드하고,  
+`label_type'을 'ner'로 변경한 후, GloVe 및 Flair로 구성된 'StackedEmbedding'을 사용하세요:
+
+```python
+from flair.datasets import CONLL_03
+from flair.embeddings import WordEmbeddings, FlairEmbeddings, StackedEmbeddings
+from flair.models import SequenceTagger
+from flair.trainers import ModelTrainer
+# 1. get the corpus
+corpus = CONLL_03()
+print(corpus)
+# 2. what label do we want to predict?
+label_type = 'ner'
+# 3. make the label dictionary from the corpus
+label_dict = corpus.make_label_dictionary(label_type=label_type)
+print(label_dict)
+# 4. initialize embedding stack with Flair and GloVe
+embedding_types = [
+    WordEmbeddings('glove'),
+    FlairEmbeddings('news-forward'),
+    FlairEmbeddings('news-backward'),
+]
+embeddings = StackedEmbeddings(embeddings=embedding_types)
+# 5. initialize sequence tagger
+tagger = SequenceTagger(hidden_size=256,
+                        embeddings=embeddings,
+                        tag_dictionary=label_dict,
+                        tag_type=label_type,
+                        use_crf=True)
+# 6. initialize trainer
+trainer = ModelTrainer(tagger, corpus)
+# 7. start training
+trainer.train('resources/taggers/sota-ner-flair',
+              learning_rate=0.1,
+              mini_batch_size=32,
+              max_epochs=150)
+```
+
+
+그렇게 하면 [Akbik et al. (2018)](https://aclanthology.org/C18-1139.pdf)에 보고된 것과 유사한 최근 숫자들이 나올 것입니다.
+
+## 변환기를 사용하여 개체명 인식 (NER) 모델 훈련하기
+
+임베딩으로 변환기를 사용하고 미세 조정하고 전체 문서 컨텍스트를 사용하면 **훨씬 더 나은 수치**를 얻을 수 있습니다. (자세한 내용은 [FLERT](https://arxiv.org/abs/2011.06993) 문서 참조)   
+이는 최신식이지만 위의 모델보다 훨씬 느립니다.
+
+변환기 임베딩을 사용하도록 스크립트를 변경하고 SGD 대신 AdamW optimizer 및 작은 학습률로 미세 조정하도록 훈련 루틴을 변경하세요:
+
+```python
+from flair.datasets import CONLL_03
+from flair.embeddings import TransformerWordEmbeddings
+from flair.models import SequenceTagger
+from flair.trainers import ModelTrainer
+import torch
+from torch.optim.lr_scheduler import OneCycleLR
+# 1. get the corpus
+corpus = CONLL_03()
+print(corpus)
+# 2. what label do we want to predict?
+label_type = 'ner'
+# 3. make the label dictionary from the corpus
+label_dict = corpus.make_label_dictionary(label_type=label_type)
+print(label_dict)
+# 4. initialize fine-tuneable transformer embeddings WITH document context
+embeddings = TransformerWordEmbeddings(
+    model='xlm-roberta-large',
+    layers="-1",
+    subtoken_pooling="first",
+    fine_tune=True,
+    use_context=True,
+)
+# 5. initialize bare-bones sequence tagger (no CRF, no RNN, no reprojection)
+tagger = SequenceTagger(
+    hidden_size=256,
+    embeddings=embeddings,
+    tag_dictionary=label_dict,
+    tag_type='ner',
+    use_crf=False,
+    use_rnn=False,
+    reproject_embeddings=False,
+)
+# 6. initialize trainer with AdamW optimizer
+trainer = ModelTrainer(tagger, corpus, optimizer=torch.optim.AdamW)
+# 7. run training with XLM parameters (20 epochs, small LR, one-cycle learning rate scheduling)
+trainer.train('resources/taggers/sota-ner-flert',
+              learning_rate=5.0e-6,
+              mini_batch_size=4,
+              mini_batch_chunk_size=1,  # remove this parameter to speed up computation if you have a big GPU
+              max_epochs=20,  # 10 is also good
+              scheduler=OneCycleLR,
+              embeddings_storage_mode='none',
+              weight_decay=0.,
+              )
+```
+
+이는 [Schweter and Akbik (2021)](https://arxiv.org/abs/2011.06993)에 보고된 최근 수치와 비슷하게 나올 것입니다.
+
+## 텍스트 분류 모델 훈련하기
+
+다른 유형의 모델을 훈련시키는 것은 위의 시퀀스 레이블러를 교육하기 위한 스크립트와 매우 유사합니다. 텍스트 분류의 경우 적절한 말뭉치를 사용하고   
+word-level 임베딩 대신 document-level 임베딩을 사용하세요. (차이점은 이 둘에 대한 튜토리얼을 참조하세요.) 나머지는 이전과 동일합니다!
+
+텍스트 분류에서 가장 좋은 결과는 아래 코드와 같이 `TransformerDocumentEmbeddings`와 함께 미세 조정된 변환기를 사용합니다:
+
+(변환기를 미세 조정할 수 있는 큰 GPU가 없는 경우 대신 `DocumentPoolEmbeddings` 또는 `DocumentRNNEmbeddings`를 사용해 보세요.   
+가끔 제대로 작동하기도 합니다!)
+
+```python
+import torch
+from torch.optim.lr_scheduler import OneCycleLR
+from flair.data import Corpus
+from flair.datasets import TREC_6
+from flair.embeddings import TransformerDocumentEmbeddings
+from flair.models import TextClassifier
+from flair.trainers import ModelTrainer
+# 1. get the corpus
+corpus: Corpus = TREC_6()
+# 2. what label do we want to predict?
+label_type = 'question_class'
+# 3. create the label dictionary
+label_dict = corpus.make_label_dictionary(label_type=label_type)
+# 4. initialize transformer document embeddings (many models are available)
+document_embeddings = TransformerDocumentEmbeddings('distilbert-base-uncased', fine_tune=True)
+# 5. create the text classifier
+classifier = TextClassifier(document_embeddings, label_dictionary=label_dict, label_type=label_type)
+# 6. initialize trainer with AdamW optimizer
+trainer = ModelTrainer(classifier, corpus, optimizer=torch.optim.AdamW)
+# 7. run training with fine-tuning
+trainer.train('resources/taggers/question-classification-with-transformer',
+              learning_rate=5.0e-5,
+              mini_batch_size=4,
+              max_epochs=10,
+              scheduler=OneCycleLR,
+              embeddings_storage_mode='none',
+              weight_decay=0.,
+              )
+```
+
+모델이 학습되면 이것을 로드하여 새로운 문장의 클래스를 예측할 수 있습니다. 모델의 'predict' 메서드를 호출하기만 하면 됩니다.
+
+```python
+classifier = TextClassifier.load('resources/taggers/question-classification-with-transformer/final-model.pt')
+# create example sentence
+sentence = Sentence('Who built the Eiffel Tower ?')
+# predict class and print
+classifier.predict(sentence)
+print(sentence.labels)
+```
+
+## 멀티 데이터셋 훈련하기
+
+이제 영어와 독일어로 텍스트에 PoS 태그를 지정할 수 있는 단일 모델을 훈련해 보겠습니다. 이를 위해 영어 및 독일어 UD 말뭉치를 로드하고 멀티 말뭉치 개체를 만듭니다. 이 작업을 위해 새로운 다국어 Flair 임베딩을 사용할 것입니다. 나머지는 모두 이전과 동일합니다.   
+e.g.: 
+
+```python
+from flair.data import MultiCorpus
+from flair.datasets import UD_ENGLISH, UD_GERMAN
+from flair.embeddings import FlairEmbeddings, StackedEmbeddings
+from flair.models import SequenceTagger
+from flair.trainers import ModelTrainer
+# 1. get the corpora - English and German UD
+corpus = MultiCorpus([UD_ENGLISH(), UD_GERMAN()]).downsample(0.1)
+# 2. what label do we want to predict?
+label_type = 'upos'
+# 3. make the label dictionary from the corpus
+label_dict = corpus.make_label_dictionary(label_type=label_type)
+print(label_dict)
+# 4. initialize embeddings
+embedding_types = [
+    # we use multilingual Flair embeddings in this task
+    FlairEmbeddings('multi-forward'),
+    FlairEmbeddings('multi-backward'),
+]
+embeddings = StackedEmbeddings(embeddings=embedding_types)
+# 5. initialize sequence tagger
+tagger = SequenceTagger(hidden_size=256,
+                        embeddings=embeddings,
+                        tag_dictionary=label_dict,
+                        tag_type=label_type,
+                        use_crf=True)
+# 6. initialize trainer
+trainer = ModelTrainer(tagger, corpus)
+# 7. start training
+trainer.train('resources/taggers/example-universal-pos',
+              learning_rate=0.1,
+              mini_batch_size=32,
+              max_epochs=150,
+              )
+```
+
+이는 다국어 모델을 제공합니다. 더 많은 언어로 실험해 보세요!
+
+## 훈련 곡선 및 가중치 Plotting
+
+Flair에는 신경망에서 훈련 곡선과 가중치를 표시하는 도우미 메서드가 포함되어 있습니다. `ModelTrainer`는 결과 폴더에 `loss.tsv`를 자동으로   
+생성합니다. 훈련 중에 `write_weights=True`로 설정하면 `weights.txt` 파일도 생성됩니다.
+
+훈련 후 plotter가 다음 파일을 가리킬 것입니다:
+
+```python
+# set write_weights to True to write weights
+trainer.train('resources/taggers/example-universal-pos',
+              ...
+write_weights = True,
+                ...
+)
+# visualize
+from flair.visual.training_curves import Plotter
+plotter = Plotter()
+plotter.plot_training_curves('loss.tsv')
+plotter.plot_weights('weights.txt')
+```
+
+결과 폴더에 PNG 플롯이 생성될 것입니다.
+
+## 훈런 재개
+
+만약 특정 시점에서 훈련을 중지하고 나중에 다시 시작하려면 'checkpoint' 매개변수를 'True'로 설정하여 학습해야 합니다. 그렇게 하면 매 epoch 후에   
+모델과 훈련 매개변수를 저장할 것입니다. 
+따라서 나중에 언제든지 모델과 트레이너를 로드하고 남은 위치에서 정확히 훈련을 계속할 수 있습니다.
+
+아래 예제 코드는 `SequenceTagger`의 훈련, 중지 및 계속 훈련 방법을 보여줍니다. 'TextClassifier'의 경우도 마찬가지입니다. 
+
+```python
+from flair.data import Corpus
+from flair.datasets import WNUT_17
+from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings
+from typing import List
+from flair.models import SequenceTagger
+from flair.trainers import ModelTrainer
+# 1. get the corpus
+corpus: Corpus = WNUT_17().downsample(0.1)
+# 2. what label do we want to predict?
+label_type = 'ner'
+# 3. make the label dictionary from the corpus
+label_dict = corpus.make_label_dictionary(label_type=label_type)
+# 4. initialize embeddings
+embedding_types: List[TokenEmbeddings] = [
+    WordEmbeddings('glove')
+]
+embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)
+# 5. initialize sequence tagger
+tagger: SequenceTagger = SequenceTagger(hidden_size=256,
+                                        embeddings=embeddings,
+                                        tag_dictionary=label_dict,
+                                        tag_type=label_type,
+                                        use_crf=True)
+# 6. initialize trainer
+trainer: ModelTrainer = ModelTrainer(tagger, corpus)
+# 7. start training
+trainer.train('resources/taggers/example-ner',
+              learning_rate=0.1,
+              mini_batch_size=32,
+              max_epochs=10,
+              checkpoint=True)
+# 8. stop training at any point
+# 9. continue trainer at later point
+checkpoint = 'resources/taggers/example-ner/checkpoint.pt'
+trainer = ModelTrainer.load_checkpoint(checkpoint, corpus)
+trainer.train('resources/taggers/example-ner',
+              learning_rate=0.1,
+              mini_batch_size=32,
+              max_epochs=150,
+              checkpoint=True)
+```
+
+## Scalability: 대규모 데이터셋으로 훈련하기
+
+Flair의 많은 임베딩은 런타임 측면에서 생성하는 데 다소 비용이 많이 들고 큰 벡터를 가질 수 있습니다. 이에 대한 예는 Flair 및 Transformer 기반 임베딩입니다. 설정에 따라 훈련 시간을 최적화하는 옵션을 설정할 수 있습니다.
+
+### Mini-Batch 크기 설정
+
+가장 중요한 것은 `mini_batch_size`입니다. GPU가 속도 향상을 위해 처리할 수 있는 경우 더 높은 값으로 설정하세요.   
+그러나 데이터 세트가 매우 작은 경우 너무 높게 설정하지 마세요. 그렇지 않으면 Epoch당 학습 단계가 충분하지 않을 것입니다.
+
+유사한 매개변수는 `mini_batch_chunk_size`입니다. 이 매개변수는 미니 배치를 청크로 더 분할하여 속도를 늦추지만 GPU 메모리 효율성을   
+향상시킵니다. 표준은 이것을 None으로 설정하는 것입니다 - GPU가 원하는 미니 배치 크기를 처리할 수 없는 경우에만 이것을 설정하세요.   
+이는 `mini_batch_size`의 반대이므로 계산 속도가 느려질 것입니다.
+
+### Embedding의 저장 모드 설정
+
+설정해야 하는 또 다른 주요 매개변수는 `ModelTrainer`의 `train()` 메서드에 있는 `embeddings_storage_mode`입니다.   
+다음 세 가지 값 중 하나를 가질 수 있습니다:
+
+1. **'none'**: `embeddings_storage_mode='none'`으로 설정하면 임베딩이 메모리에 저장되지 않습니다. 대신 (**훈련** 동안) 각 훈련 미니 배치에서 즉석에서 생성됩니다. 주요한 이점은 메모리 요구 사항을 낮게 유지한다는 것입니다. 변압기를 미세 조정하는 경우 항상 이것을 설정하세요.
+   
+   
+
+2. **'cpu'**: `embeddings_storage_mode='cpu'`를 설정하면 임베딩이 일반 메모리에 저장될 것입니다.
+
+* during *training*: 임베딩은 첫 번째 epoch에서만 계산되고 그 후에는 메모리에서 검색되기 때문에 많은 경우에 속도가 크게 빨라집니다. 이것의 단점은 메모리 요구 사항이 증가한다는 것입니다. 데이터셋의 크기와 메모리 설정에 따라 이 옵션이 불가능할 수 있습니다.
+* during *inference*: GPU 메모리에서 일반 메모리로 임베딩을 이동해야 하므로 GPU와 함께 사용할 때 추론 속도가 느려집니다. 추론 중에 이 옵션을 사용하는 유일한 이유는 예측뿐만 아니라 예측 후 임베딩도 사용하기 위해서입니다.
+
+3. **'gpu'**: `embeddings_storage_mode='gpu'`로 설정하면 임베딩은 CUDA 메모리에 저장될 것입니다. 이는 CPU에서 CUDA로 텐서를 계속해서 섞을 필요가 없기 때문에 가장 빠른 경우가 많습니다. 물론 CUDA 메모리는 종종 제한되어 있어 큰 데이터셋은 CUDA 메모리에 맞지 않습니다. 하지만 데이터셋이 CUDA 메모리에 맞는 경우에는 이 옵션이 가장 빠릅니다.
+   
+
+## Next
+
+훈련 데이터가 없거나 아주 적은 경우 TARS 접근 방식이 가장 적합할 수 있습니다.   
+[TUTORIAL_10_TRAINING_ZERO_SHOT_MODEL](/docs/TUTORIAL_10_TRAINING_ZERO_SHOT_MODEL.md): Few-shot and zero-shot classification에 대한 TARS 튜토리얼을 확인하세요.
+
+또는   
+[TUTORIAL_9_TRAINING_LM_EMBEDDINGS](/docs/TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md): Training your own embeddings을 살펴보세요.

From c0666e1a79a4bad2286cf17c3cfbe82f2a9673b9 Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:36:04 +0900
Subject: [PATCH 08/30] Create TUTORIAL_8_MODEL_OPTIMIZATION.md

---
 .../KOR_docs/TUTORIAL_8_MODEL_OPTIMIZATION.md | 146 ++++++++++++++++++
 1 file changed, 146 insertions(+)
 create mode 100644 resources/docs/KOR_docs/TUTORIAL_8_MODEL_OPTIMIZATION.md

diff --git a/resources/docs/KOR_docs/TUTORIAL_8_MODEL_OPTIMIZATION.md b/resources/docs/KOR_docs/TUTORIAL_8_MODEL_OPTIMIZATION.md
new file mode 100644
index 000000000..962cca443
--- /dev/null
+++ b/resources/docs/KOR_docs/TUTORIAL_8_MODEL_OPTIMIZATION.md
@@ -0,0 +1,146 @@
+# Tutorial 8: Model Tuning
+
+튜토리얼 8에서는 올바른 모델 및 하이퍼 매개변수 셋을 선택하여 모델의 품질을 향상시킬 수 있는 방법을 살펴볼 것입니다.
+
+## Hyper Parameter 선택하기
+
+Flair에는 잘 알려진 하이퍼 매개변수 선택 도구인 [hyperopt](https://github.com/hyperopt/hyperopt)에 대한 래퍼가 포함되어 있습니다.
+
+먼저 말뭉치를 로드해야 합니다. 다음 예에서 사용된 [AGNews corpus](https://www.di.unipi.it/~gulli/AG_corpus_of_news_articles.html)를 로드하려면 
+먼저 다운로드하여 올바른 형식으로 변환하세요.   
+자세한 내용은 [tutorial 6](/docs/TUTORIAL_6_CORPUS.md)을 확인하세요.
+
+```python
+from flair.datasets import TREC_6
+# load your corpus
+corpus = TREC_6()
+```
+
+두 번째로는 매개변수의 검색 공간을 정의해야 합니다. 이를 통해 hyperopt에서 정의한 모든 [parameter expressions](https://github.com/hyperopt/hyperopt/wiki/FMin#21-parameter-expressions)을 사용할 수 있습니다.
+
+```python
+from hyperopt import hp
+from flair.hyperparameter.param_selection import SearchSpace, Parameter
+# define your search space
+search_space = SearchSpace()
+search_space.add(Parameter.EMBEDDINGS, hp.choice, options=[
+    [ WordEmbeddings('en') ], 
+    [ FlairEmbeddings('news-forward'), FlairEmbeddings('news-backward') ]
+])
+search_space.add(Parameter.HIDDEN_SIZE, hp.choice, options=[32, 64, 128])
+search_space.add(Parameter.RNN_LAYERS, hp.choice, options=[1, 2])
+search_space.add(Parameter.DROPOUT, hp.uniform, low=0.0, high=0.5)
+search_space.add(Parameter.LEARNING_RATE, hp.choice, options=[0.05, 0.1, 0.15, 0.2])
+search_space.add(Parameter.MINI_BATCH_SIZE, hp.choice, options=[8, 16, 32])
+```
+
+Attention: 항상 검색 공간에 임베딩을 추가해야 합니다(위 그림 참조). 다른 종류의 임베딩을 테스트하지 않으려면 검색 공간에 하나의 임베딩 옵션만 전달하면 됩니다. 그러면 모든 테스트 실행에서 사용될 것입니다. 
+다음 예시를 참조하세요.
+
+```python
+search_space.add(Parameter.EMBEDDINGS, hp.choice, options=[
+    [ FlairEmbeddings('news-forward'), FlairEmbeddings('news-backward') ]
+])
+```
+
+마지막 단계에서 실제 매개변수 선택기를 생성해야 합니다.
+작업에 따라 `TextClassifierParamSelector` 또는 `SequenceTaggerParamSelector`를 정의하고 최적화를 시작해야 합니다.
+hyperopt가 수행해야 하는 최대 평가 실행 횟수를 정의할 수 있습니다(`max_evals`). 평가 실행은 지정된 수의 epoch(`max_epochs`)를 수행합니다.
+시끄러운 평가 점수 문제를 극복하기 위해 평가 실행에서 마지막 세 평가 점수('dev_score' 또는 'dev_loss')에 대한 평균을 취합니다. 이 점수는 최종 점수를 나타내며 hyperopt에 전달됩니다.
+또한 평가 실행당 실행 횟수(`training_runs`)를 지정할 수 있습니다.
+둘 이상의 훈련 실행을 지정하는 경우 하나의 평가 실행이 지정된 횟수만큼 실행됩니다.
+최종 평가 점수는 모든 실행에 대한 평균이 됩니다.
+
+```python
+from flair.hyperparameter.param_selection import TextClassifierParamSelector, OptimizationValue
+# create the parameter selector
+param_selector = TextClassifierParamSelector(
+    corpus, 
+    False, 
+    'resources/results', 
+    'lstm',
+    max_epochs=50, 
+    training_runs=3,
+    optimization_value=OptimizationValue.DEV_SCORE
+)
+# start the optimization
+param_selector.optimize(search_space, max_evals=100)
+```
+
+매개변수 설정 및 평가 점수는 결과 디렉토리의 'param_selection.txt'에 기록됩니다.
+최상의 매개변수 조합을 선택하는 동안 어떤 모델도 디스크에 저장하지 않으며 테스트 실행 또한 수행하지 않습니다.
+훈련 중에 로깅 목적으로 테스트 세트에 대한 훈련 후 모델을 한 번만 평가합니다.
+
+## 최고의 학습률 찾기
+
+
+학습률은 가장 중요한 하이퍼 매개변수 중 하나이며 기본적으로 모델의 아키텍처와 모델이 사용하는 교육 데이터를 통한 손실 환경의 토폴로지에 따라 다릅니다.   
+최적의 학습은 훈련 속도를 향상시키고 더 나은 성능의 모델을 제공할 것입니다. Leslie Smith가 설명한 간단한 기술
+[Cyclical Learning Rates for Training](https://arxiv.org/abs/1506.01186) 논문은 매우 낮은 학습률로 시작하여 
+SGD의 모든 배치 업데이트에서 학습률을 기하급수적으로 증가시키는 모델을 학습시키는 것입니다. 우리는 손실을 플로팅하여
+학습률과 관련하여 일반적으로 세 가지 별개의 단계를 관찰할 것입니다:   
+낮은 학습률의 경우 손실이 개선되지 않으며, 손실이 가장 급격하게 떨어지는 최적의 학습률 범위와 학습률이 너무 커지면 손실이 폭발하는 최종 단계입니다. 
+이러한 플롯을 사용하면 최적의 학습률을 선택하는 것이 최적의 단계에서 가장 높은 것을 선택하는 것만큼 쉽습니다. 
+
+In order to run such an experiment start with your initialized `ModelTrainer` and call `find_learning_rate()` with the
+`base_path` and the file name in which to records the learning rates and losses. Then plot the generated results via the
+`Plotter`'s `plot_learning_rate()` function and have a look at the `learning_rate.png` image to select the optimal
+learning rate:
+
+이러한 실험을 실행하려면 초기화된 'ModelTrainer'로 시작하고 학습률과 손실을 기록할 파일 이름과 'base_path'와 함께 'find_learning_rate()'를 호출하십시오. 
+그런 다음 `Plotter`의 `plot_learning_rate()` 함수를 통해 생성된 결과를 플롯하고 `learning_rate.png` 이미지를 보고 최적의 학습률을 선택하세요:
+
+```python
+from flair.datasets import WNUT_17
+from flair.embeddings import TokenEmbeddings, WordEmbeddings, StackedEmbeddings
+from flair.trainers import ModelTrainer
+from typing import List
+# 1. get the corpus
+corpus = WNUT_17().downsample(0.1)
+print(corpus)
+# 2. what tag do we want to predict?
+tag_type = 'ner'
+# 3. make the tag dictionary from the corpus
+tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
+print(tag_dictionary.idx2item)
+# 4. initialize embeddings
+embedding_types: List[TokenEmbeddings] = [
+    WordEmbeddings('glove'),
+]
+embeddings: StackedEmbeddings = StackedEmbeddings(embeddings=embedding_types)
+# 5. initialize sequence tagger
+from flair.models import SequenceTagger
+tagger: SequenceTagger = SequenceTagger(hidden_size=256,
+                                        embeddings=embeddings,
+                                        tag_dictionary=tag_dictionary,
+                                        tag_type=tag_type,
+                                        use_crf=True)
+# 6. initialize trainer
+trainer: ModelTrainer = ModelTrainer(tagger, corpus)
+# 7. find learning rate
+learning_rate_tsv = trainer.find_learning_rate('resources/taggers/example-ner',
+                                                    'learning_rate.tsv')
+# 8. plot the learning rate finder curve
+from flair.visual.training_curves import Plotter
+plotter = Plotter()
+plotter.plot_learning_rate(learning_rate_tsv)
+```
+
+## Custom Optimizers
+
+이제 'ModelTrainer'를 초기화할 때 PyTorch의 최적화 프로그램을 훈련에 사용할 수 있습니다. 옵티마이저에 추가 옵션을 제공하려면 `weight_decay` 예제와 같이 지정하기만 하면 됩니다:
+
+```python
+from torch.optim.adam import Adam
+trainer = ModelTrainer(tagger, corpus,
+                       optimizer=Adam)
+                                     
+trainer.train(
+    "resources/taggers/example",
+    weight_decay=1e-4
+)
+```
+
+## Next
+
+다음 튜토리얼에서는 [training your own embeddings](/docs/TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md)에 대해 살펴볼 것입니다,

From c538d528b2c1a408e04c92fa0a753a1063f2ed80 Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:36:21 +0900
Subject: [PATCH 09/30] Create TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md

---
 .../TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md      | 187 ++++++++++++++++++
 1 file changed, 187 insertions(+)
 create mode 100644 resources/docs/KOR_docs/TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md

diff --git a/resources/docs/KOR_docs/TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md b/resources/docs/KOR_docs/TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md
new file mode 100644
index 000000000..a675a7d79
--- /dev/null
+++ b/resources/docs/KOR_docs/TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md
@@ -0,0 +1,187 @@
+# Tutorial 9: 여러분만의 Flair 임베딩을 훈련하세요!
+
+Flair Embeddings는 Flair의 비밀 소스이며, 이를 통해 다양한 NLP 작업에서 최첨단 정확도를 달성할 수 있습니다.
+이 튜토리얼에서는 자신만의 Flair 임베딩을 훈련하는 방법을 알려줄 것입니다. 이는 Flair를 새로운 언어나 도메인에 적용하려는 경우에 유용할 수 있습니다.
+
+
+## 텍스트 말뭉치(Corpus) 준비
+
+언어 모델은 일반 텍스트로 학습됩니다. 문자 LM의 경우 문자 시퀀스에서 다음 문자를 예측하도록 훈련합니다.
+자신의 모델을 학습시키려면 먼저 적절하게 큰 말뭉치를 식별해야 합니다. 실험에서는 약 10억 개의 단어가 있는 말뭉치를 사용했습니다.
+
+코퍼스를 학습, 검증 및 테스트 부분으로 분할해야 합니다.
+우리의 트레이너 클래스는 테스트 및 검증 데이터가 있는 'test.txt'와 'valid.txt'가 있는 코퍼스용 폴더가 있다고 가정하고 있습니다.
+중요한 것은 분할된 훈련 데이터를 포함하는 'train'이라는 폴더도 있다는 것입니다.
+예를 들어, 10억 단어 코퍼스는 100개 부분으로 나뉩니다.
+모든 데이터가 메모리에 맞지 않는 경우 분할이 필요합니다. 이 경우 트레이너는 모든 분할을 무작위로 반복합니다.
+
+따라서 폴더 구조는 다음과 같아야 합니다:
+
+```
+corpus/
+corpus/train/
+corpus/train/train_split_1
+corpus/train/train_split_2
+corpus/train/...
+corpus/train/train_split_X
+corpus/test.txt
+corpus/valid.txt
+```
+
+대부분의 경우 문서나 문장에 대한 명확한 구분 기호가 없는 구조화되지 않은 형식으로 말뭉치를 제공하는 것이 좋습니다. LM이 문서 경계를 더 쉽게 식별할 수 있도록 하려면 "[SEP]"와 같은 구분 토큰을 도입할 수 있습니다.
+
+## 언어 모델 훈련
+
+이 폴더 구조가 있으면 `LanguageModelTrainer` 클래스를 이 폴더 구조로 지정하여 모델 학습을 시작하세요.
+
+```python
+from flair.data import Dictionary
+from flair.models import LanguageModel
+from flair.trainers.language_model_trainer import LanguageModelTrainer, TextCorpus
+# are you training a forward or backward LM?
+is_forward_lm = True
+# load the default character dictionary
+dictionary: Dictionary = Dictionary.load('chars')
+# get your corpus, process forward and at the character level
+corpus = TextCorpus('/path/to/your/corpus',
+                    dictionary,
+                    is_forward_lm,
+                    character_level=True)
+# instantiate your language model, set hidden size and number of layers
+language_model = LanguageModel(dictionary,
+                               is_forward_lm,
+                               hidden_size=128,
+                               nlayers=1)
+# train your language model
+trainer = LanguageModelTrainer(language_model, corpus)
+trainer.train('resources/taggers/language_model',
+              sequence_length=10,
+              mini_batch_size=10,
+              max_epochs=10)
+```
+
+이 스크립트의 매개변수는 매우 작습니다. 숨겨진 사이즈는 1024 또는 2048, 시퀀스 길이는 250, 미니 배치 크기는 100으로 좋은 결과를 얻었습니다.
+리소스에 따라 대규모 모델을 훈련할 수 있지만 모델을 훈련하는 데 매우 강력한 GPU와 많은 시간이 필요하다는 점에 유의하십시오. (1주 이상 훈련)
+
+
+
+## LM을 임베딩으로 사용
+
+LM을 학습하면 임베딩으로 사용하기 쉽습니다. 모델을 `FlairEmbeddings` 클래스에 로드하고 Flair의 다른 임베딩처럼 사용하세요:
+
+```python
+sentence = Sentence('I love Berlin')
+# init embeddings from your trained LM
+char_lm_embeddings = FlairEmbeddings('resources/taggers/language_model/best-lm.pt')
+# embed sentence
+char_lm_embeddings.embed(sentence)
+```
+
+끝입니다!
+
+
+## 라틴어가 아닌 알파벳
+
+아랍어나 일본어와 같은 비라틴어 알파벳을 사용하는 언어에 대한 임베딩을 훈련하는 경우 먼저 고유한 문자 사전을 만들어야 합니다. 다음 코드로 이 작업을 수행할 수 있습니다:
+
+```python
+# make an empty character dictionary
+from flair.data import Dictionary
+char_dictionary: Dictionary = Dictionary()
+# counter object
+import collections
+counter = collections.Counter()
+processed = 0
+import glob
+files = glob.glob('/path/to/your/corpus/files/*.*')
+print(files)
+for file in files:
+    print(file)
+    with open(file, 'r', encoding='utf-8') as f:
+        tokens = 0
+        for line in f:
+            processed += 1            
+            chars = list(line)
+            tokens += len(chars)
+            # Add chars to the dictionary
+            counter.update(chars)
+            # comment this line in to speed things up (if the corpus is too large)
+            # if tokens > 50000000: break
+    # break
+total_count = 0
+for letter, count in counter.most_common():
+    total_count += count
+print(total_count)
+print(processed)
+sum = 0
+idx = 0
+for letter, count in counter.most_common():
+    sum += count
+    percentile = (sum / total_count)
+    # comment this line in to use only top X percentile of chars, otherwise filter later
+    # if percentile < 0.00001: break
+    char_dictionary.add_item(letter)
+    idx += 1
+    print('%d\t%s\t%7d\t%7d\t%f' % (idx, letter, count, sum, percentile))
+print(char_dictionary.item2idx)
+import pickle
+with open('/path/to/your_char_mappings', 'wb') as f:
+    mappings = {
+        'idx2item': char_dictionary.idx2item,
+        'item2idx': char_dictionary.item2idx
+    }
+    pickle.dump(mappings, f)
+```
+
+그런 다음 언어 모델 학습을 위해 코드의 기본 사전 대신 이 사전을 사용할 수 있습니다.
+
+```python
+import pickle
+dictionary = Dictionary.load_from_file('/path/to/your_char_mappings')
+```
+
+## 파라미터
+
+우리는 `LanguageModelTrainer`의 일부 학습 매개변수를 가지고 놀 수 있습니다.
+예를 들어, 우리는 일반적으로 초기 학습률이 20이고 annealing 계수 4가 대부분의 말뭉치에 대해 꽤 좋은 것을 알 수 있습니다.
+학습률 스케줄러의 '인내' 값을 수정할 수도 있습니다. 현재 25개로 설정되어 있습니다. 즉, 25개의 분할에 대해 훈련 손실이 개선되지 않으면 학습률이 감소합니다.
+
+
+## 기존 LM 미세 조정
+
+때로는 처음부터 훈련하는 대신 기존 언어 모델을 미세 조정하는 것이 합리적입니다. 예를 들어, 영어에 대한 일반 LM이 있고 특정 도메인에 대해 미세 조정하려는 경우입니다. 
+
+`LanguageModel`을 미세 조정하려면 새 인스턴스를 생성하는 대신 기존 `LanguageModel`을 로드하기만 하면 됩니다. 나머지 훈련 코드는 위와 동일하게 유지됩니다.
+
+
+```python
+from flair.data import Dictionary
+from flair.embeddings import FlairEmbeddings
+from flair.trainers.language_model_trainer import LanguageModelTrainer, TextCorpus
+# instantiate an existing LM, such as one from the FlairEmbeddings
+language_model = FlairEmbeddings('news-forward').lm
+# are you fine-tuning a forward or backward LM?
+is_forward_lm = language_model.is_forward_lm
+# get the dictionary from the existing language model
+dictionary: Dictionary = language_model.dictionary
+# get your corpus, process forward and at the character level
+corpus = TextCorpus('path/to/your/corpus',
+                    dictionary,
+                    is_forward_lm,
+                    character_level=True)
+# use the model trainer to fine-tune this model on your corpus
+trainer = LanguageModelTrainer(language_model, corpus)
+trainer.train('resources/taggers/language_model',
+              sequence_length=100,
+              mini_batch_size=100,
+              learning_rate=20,
+              patience=10,
+              checkpoint=True)
+```              
+              
+미세조정 시에는 전과 동일한 문자사전을 사용해야 하며 방향(앞/뒤)을 복사해야 합니다.
+
+
+## LM에 기여해보세요!
+
+아직 Flair에 없는 언어나 도메인에 대해 우수한 LM을 훈련하고 있다면 저희에게 연락해주세요. 다른 사람들이 사용할 수 있도록 더 많은 LM을 라이브러리에 통합하게 되어 기쁩니다!

From e02529f4ad6a618b9b2655e161ae27c03be5e4ef Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:36:41 +0900
Subject: [PATCH 10/30] Create TUTORIAL_10_TRAINING_ZERO_SHOT_MODEL.md

---
 .../TUTORIAL_10_TRAINING_ZERO_SHOT_MODEL.md   | 210 ++++++++++++++++++
 1 file changed, 210 insertions(+)
 create mode 100644 resources/docs/KOR_docs/TUTORIAL_10_TRAINING_ZERO_SHOT_MODEL.md

diff --git a/resources/docs/KOR_docs/TUTORIAL_10_TRAINING_ZERO_SHOT_MODEL.md b/resources/docs/KOR_docs/TUTORIAL_10_TRAINING_ZERO_SHOT_MODEL.md
new file mode 100644
index 000000000..d18a313a1
--- /dev/null
+++ b/resources/docs/KOR_docs/TUTORIAL_10_TRAINING_ZERO_SHOT_MODEL.md
@@ -0,0 +1,210 @@
+# Tutorial 10: Few-Shot과 Zero-Shot 분류 (TARS)
+
+TARS(Task-aware representation of sentence)는 [Halder et al. (2020)](https://kishaloyhalder.github.io/pdfs/tars_coling2020.pdf)이 **텍스트 분류를 위한 퓨샷 및 제로샷 학습**을 위한 간단하고 효과적인 방법으로 도입했습니다.
+이것은 훈련 예제 없이도 텍스트를 분류할 수 있음을 의미합니다.
+이 모델은 Flair에서 'TASClassifier' 클래스로 구현됩니다.
+ 
+이번 튜토리얼에서는 TARS를 사용하는 다양한 방법을 보여줄 것입니다:
+
+    
+## Use Case #1: 훈련 데이터 없이 텍스트 분류(Zero-Shot)
+
+때로 우리는 해결하려는 텍스트 분류 작업에 대한 훈련 데이터가 없을 때가 있습니다. 이 경우 기본 TARS 모델을 로드하고 제로샷 예측을 수행할 수 있습니다.   
+즉, TARS의 `predict_zero_shot` 방법을 사용하고 레이블 이름 목록을 제공하는 것입니다. 그런 다음 TARS는 이러한 레이블 중 하나를 텍스트와 일치시키려고 시도할 것입니다.
+
+예를 들어 텍스트가 "행복"인지 "슬픔"인지 예측하고 싶지만 이에 대한 교육 데이터가 없다고 가정해 보겠습니다.
+이 스니펫과 함께 TARS를 사용하기만 하면 됩니다:
+
+```python
+from flair.models import TARSClassifier
+from flair.data import Sentence
+# 1. Load our pre-trained TARS model for English
+tars = TARSClassifier.load('tars-base')
+# 2. Prepare a test sentence
+sentence = Sentence("I am so glad you liked it!")
+# 3. Define some classes that you want to predict using descriptive names
+classes = ["happy", "sad"]
+#4. Predict for these classes
+tars.predict_zero_shot(sentence, classes)
+# Print sentence with predicted labels
+print(sentence)
+```
+
+출력은 다음과 같습니다:   
+
+```console
+Sentence: "I am so glad you liked it !"   [− Tokens: 8  − Sentence-Labels: {'label': [happy (0.9312)]}]
+```
+
+이 문장에는 "happy"라는 레이블이 선택되었습니다.
+
+다른 라벨과 함께 사용해 보세요! 제로샷 예측은 때때로 (*항상 그런 것은 아니지만*) 매우 잘 작동합니다.
+
+## Use Case #2: TARS를 사용한 제로샷 NER(Named Entity Recognition)
+
+TARS 제로샷 학습 접근 방식을 시퀀스 라벨링으로 확장하고 영어 NER에 대해 사전 훈련된 모델을 제공합니다. 일부 클래스를 정의하고 모델이 클래스를 찾을 수 있는지 확인하세요:
+
+```python
+from flair.models import TARSTagger
+from flair.data import Sentence
+# 1. Load zero-shot NER tagger
+tars = TARSTagger.load('tars-ner')
+# 2. Prepare some test sentences
+sentences = [
+    Sentence("The Humboldt University of Berlin is situated near the Spree in Berlin, Germany"),
+    Sentence("Bayern Munich played against Real Madrid"),
+    Sentence("I flew with an Airbus A380 to Peru to pick up my Porsche Cayenne"),
+    Sentence("Game of Thrones is my favorite series"),
+]
+# 3. Define some classes of named entities such as "soccer teams", "TV shows" and "rivers"
+labels = ["Soccer Team", "University", "Vehicle", "River", "City", "Country", "Person", "Movie", "TV Show"]
+tars.add_and_switch_to_new_task('task 1', labels, label_type='ner')
+# 4. Predict for these classes and print results
+for sentence in sentences:
+    tars.predict(sentence)
+    print(sentence.to_tagged_string("ner"))
+```
+
+다음과 같이 출력될 것입니다:
+
+```console
+The Humboldt <B-University> University <I-University> of <I-University> Berlin <E-University> is situated near the Spree <S-River> in Berlin <S-City> , Germany <S-Country>
+
+Bayern <B-Soccer Team> Munich <E-Soccer Team> played against Real <B-Soccer Team> Madrid <E-Soccer Team>
+
+I flew with an Airbus <B-Vehicle> A380 <E-Vehicle> to Peru <S-City> to pick up my Porsche <B-Vehicle> Cayenne <E-Vehicle>
+
+Game <B-TV Show> of <I-TV Show> Thrones <E-TV Show> is my favorite series
+```
+
+
+따라서 이 예제에서는 모델이 이에 대해 명시적으로 훈련된 적이 없음에도 불구하고 "TV show" (_왕좌의 게임_), "vehicle" (_Airbus A380_ and _Porsche Cayenne_),
+"soccer team" (_Bayern Munich_ and _Real Madrid_) 및 "river" (_Spree_) 와 같은 엔터티 클래스를 찾고 있습니다.
+이는 진행중인 연구이며 예제는 약간 cherry-picked 된 것입니다. 제로샷 모델은 다음 릴리스까지 상당히 개선될 것으로 기대합니다.
+
+## Use Case #3: TARS 모델 학습 
+
+또한 처음부터 또는 제공된 TARS 모델을 시작점으로 사용하여 고유한 TARS 모델을 훈련할 수 있습니다. 후자를 선택한 경우 새 작업을 훈련하는 데 필요한 훈련 데이터가 거의 없을 수 있습니다.
+
+### 하나의 데이터셋으로 학습하는 방법
+
+하나의 데이터 세트로 훈련하는 것은 Flair에서 다른 모델을 훈련하는 것과 거의 동일합니다. 유일한 차이점은 레이블 이름을 자연어 설명으로 바꾸는 것이 때때로 의미가 있다는 것입니다.
+예를 들어, TREC 데이터 세트는 "엔티티에 대한 질문"으로 바꿔 말하는 "ENTY"와 같은 레이블을 정의합니다. 더 나은 설명은 TARS가 배우는 데 도움이 됩니다.
+
+전체 훈련 코드는 다음과 같습니다:
+
+```python
+from flair.data import Corpus
+from flair.datasets import TREC_6
+from flair.models import TARSClassifier
+from flair.trainers import ModelTrainer
+# 1. define label names in natural language since some datasets come with cryptic set of labels
+label_name_map = {'ENTY': 'question about entity',
+                  'DESC': 'question about description',
+                  'ABBR': 'question about abbreviation',
+                  'HUM': 'question about person',
+                  'NUM': 'question about number',
+                  'LOC': 'question about location'
+                  }
+# 2. get the corpus
+corpus: Corpus = TREC_6(label_name_map=label_name_map)
+# 3. what label do you want to predict?
+label_type = 'question_class'
+# 4. make a label dictionary
+label_dict = corpus.make_label_dictionary(label_type=label_type)
+# 5. start from our existing TARS base model for English
+tars = TARSClassifier.load("tars-base")
+# 5a: alternatively, comment out previous line and comment in next line to train a new TARS model from scratch instead
+# tars = TARSClassifier(embeddings="bert-base-uncased")
+# 6. switch to a new task (TARS can do multiple tasks so you must define one)
+tars.add_and_switch_to_new_task(task_name="question classification",
+                                label_dictionary=label_dict,
+                                label_type=label_type,
+                                )
+# 7. initialize the text classifier trainer
+trainer = ModelTrainer(tars, corpus)
+# 8. start the training
+trainer.train(base_path='resources/taggers/trec',  # path to store the model artifacts
+              learning_rate=0.02,  # use very small learning rate
+              mini_batch_size=16,
+              mini_batch_chunk_size=4,  # optionally set this if transformer is too much for your machine
+              max_epochs=1,  # terminate after 10 epochs
+              )
+```
+
+이 스크립트는 TARS 기반 모델에서 시작하므로 몇 에포크면 충분합니다. 그러나 대신 처음부터 새로운 TARS 모델을 훈련하면
+(위의 코드 스니펫의 5a단계 참조) 10 또는 20 Epoch 동안 훈련하고 싶을 것입니다.
+
+
+### 여러 데이터셋으로 학습하는 방법
+
+TARS는 하나 이상의 분류 작업에서 학습하면 퓨샷 및 제로샷 예측에서 더 좋아집니다.
+
+예를 들어 GO_EMOTIONS 데이터 세트를 사용하여 TREC_6에 대해 훈련한 모델을 계속 훈련해 보겠습니다. 코드는 다시 매우 유사해 보입니다. 새 데이터 세트를 학습하기 직전에 `add_and_switch_to_new_task`를 호출해야 합니다.
+이렇게 하면 모델이 이제 TREC_6 대신 GO_EMOTIONS를 훈련해야 함을 알 수 있습니다:
+
+```python
+from flair.datasets import GO_EMOTIONS
+from flair.models import TARSClassifier
+from flair.trainers import ModelTrainer
+# 1. Load the trained model
+tars = TARSClassifier.load('resources/taggers/trec/best-model.pt')
+# 2. load a new flair corpus e.g., GO_EMOTIONS, SENTIMENT_140 etc
+new_corpus = GO_EMOTIONS()
+# 3. define label type
+label_type = "emotion"
+# 4. make a label dictionary
+label_dict = new_corpus.make_label_dictionary(label_type=label_type)
+# 5. IMPORTANT: switch to new task
+tars.add_and_switch_to_new_task("GO_EMOTIONS",
+                                label_dictionary=label_dict,
+                                label_type=label_type)
+# 6. initialize the text classifier trainer
+trainer = ModelTrainer(tars, new_corpus)
+# 6. start the training
+trainer.train(base_path='resources/taggers/go_emotions', # path to store the model artifacts
+              learning_rate=0.02, # use very small learning rate
+              mini_batch_size=16,
+              mini_batch_chunk_size=4, # optionally set this if transformer is too much for your machine
+              max_epochs=10, # terminate after 10 epochs
+              )
+```
+
+이 튜토리얼이 끝나면 결과 모델은 TREC_6 및 GO_EMOTIONS 모두에 대해 고품질 예측을 수행할 수 있으며 이전보다 적은 수의 학습을 위한 더 나은 기반이 됩니다.
+
+
+
+## 작업 간 전환
+
+TARS는 레이블 이름과 기본 언어 모델의 텍스트 간의 관계를 캡슐화할 수 있습니다. 위와 같이 여러 말뭉치에 대해 단일 모델을 학습할 수 있습니다. 
+이것은 편의를 위해 내부적으로 레이블 집합을 다른 작업으로 그룹화합니다. 사용자는 TARS 모델이 훈련된 기존 작업을 조회한 다음 필요에 따라 그 중 하나로 전환할 수 있습니다.
+
+```python
+# 1. Load a pre-trained TARS model
+tars = TARSClassifier.load('tars-base')
+# 2. Check out what datasets it was trained on
+existing_tasks = tars.list_existing_tasks()
+print(f"Existing tasks are: {existing_tasks}")
+# 3. Switch to a particular task that exists in the above list
+tars.switch_to_task("GO_EMOTIONS")
+# 4. Prepare a test sentence
+sentence = Sentence("I absolutely love this!")
+tars.predict(sentence)
+print(sentence)
+```
+출력은 다음과 같습니다: 
+```
+Existing tasks are: {'AGNews', 'DBPedia', 'IMDB', 'SST', 'TREC_6', 'NEWS_CATEGORY', 'Amazon', 'Yelp', 'GO_EMOTIONS'}
+Sentence: "I absolutely love this !"   [− Tokens: 5  − Sentence-Labels: {'label': [LOVE (0.9708)]}]
+```
+
+## TARS 사용 시 다음 논문을 인용하십시오:
+
+```
+@inproceedings{halder2020coling,
+  title={Task Aware Representation of Sentences for Generic Text Classification},
+  author={Halder, Kishaloy and Akbik, Alan and Krapac, Josip and Vollgraf, Roland},
+  booktitle = {{COLING} 2020, 28th International Conference on Computational Linguistics},
+  year      = {2020}
+}
+```

From 3fa840ea1f9e220fe7c691158f30139a03c647f1 Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:39:37 +0900
Subject: [PATCH 11/30] Create README.md

---
 resources/docs/KOR_docs/README.md | 209 ++++++++++++++++++++++++++++++
 1 file changed, 209 insertions(+)
 create mode 100644 resources/docs/KOR_docs/README.md

diff --git a/resources/docs/KOR_docs/README.md b/resources/docs/KOR_docs/README.md
new file mode 100644
index 000000000..b48c41c26
--- /dev/null
+++ b/resources/docs/KOR_docs/README.md
@@ -0,0 +1,209 @@
+# doc_flairNLP
+flairNLP를 한국어로 이해하기 쉽게 번역한 튜토리얼 번역본입니다.    
+
+
+![alt text](https://github.com/flairNLP/flair/blob/master/resources/docs/flair_logo_2020.png?raw=true)
+
+[![PyPI version](https://badge.fury.io/py/flair.svg)](https://badge.fury.io/py/flair)
+[![GitHub Issues](https://img.shields.io/github/issues/flairNLP/flair.svg)](https://github.com/flairNLP/flair/issues)
+[![Contributions welcome](https://img.shields.io/badge/contributions-welcome-brightgreen.svg)](CONTRIBUTING.md)
+[![License: MIT](https://img.shields.io/badge/License-MIT-brightgreen.svg)](https://opensource.org/licenses/MIT)
+
+**최첨단 NLP**를 위한 매우 간단한 프레임워크입니다. 
+[Humboldt University of Berlin](https://www.informatik.hu-berlin.de/en/forschung-en/gebiete/ml-en/) 및 친구들에 의해 개발되었습니다.
+
+---
+
+Flair는:
+
+* **강력한 NLP 라이브러리입니다.**    
+ Flair를 사용하면 명명된 개체 인식(NER), 품사 태깅(PoS), [생체 의학 데이터](https://github.com/flairNLP/flair/blob/94393aa82444f28c5a1da6057b8ff57b3cb390e6/resources/docs/HUNFLAIR.md)에 대한 특별 지원과 같은 최첨단 자연어 처리(NLP) 모델을 텍스트에 적용할 수 있습니다.
+ 또한 빠르게 증가하는 언어를 지원하여 명확화 및 분류를 감지합니다.
+
+* **텍스트 임베딩 라이브러리입니다.**    
+Flair에는 제안된 **[Flair embeddings](https://www.aclweb.org/anthology/C18-1139/)**, BERT 임베딩 및 ELMo 임베딩을 포함하여 다양한 단어 및 문서 임베딩을 사용하고 결합할 수 있는 간단한 인터페이스가 있습니다.
+
+* **파이토치 NLP 프레임워크입니다.**    
+ 우리의 프레임워크는 [PyTorch](https://pytorch.org/)를 기반으로 직접 구축되어 쉽게 자신의 모델을 훈련하고 Flair 임베딩 및 클래스를 사용하여 새로운 접근 방식을 실험할 수 있습니다.
+
+ 이제 [version 0.9](https://github.com/flairNLP/flair/releases)입니다!
+
+
+## Join Us: HU-Berlin에서 채용 공고!
+
+박사 학위를 추구하고 오픈 소스를 사랑하기 위해 NLP/ML 연구를 수행하는 데 관심이 있다면 연구원 및 박사 후보자를 위해 [open positions](https://github.com/flairNLP/flair/issues/2446)에 지원하는 것을 고려하십시오. 베를린 훔볼트 대학교에서! 현재 **3개의 공석**이 있으며 곧 지원 마감일입니다!
+
+## 최첨단 모델
+
+Flair는 다양한 NLP 작업을 위한 최신 모델과 함께 제공됩니다. 예를 들어 최신 NER 모델을 확인해보세요:
+
+| Language | Dataset | Flair | Best published | Model card & demo
+|  ---  | ----------- | ---------------- | ------------- | ------------- |
+| English | Conll-03 (4-class)   |  **94.09**  | *94.3 [(Yamada et al., 2018)](https://doi.org/10.18653/v1/2020.emnlp-main.523)* | [Flair English 4-class NER demo](https://huggingface.co/flair/ner-english-large)  |
+| English | Ontonotes (18-class)  |  **90.93**  | *91.3 [(Yu et al., 2016)](https://www.aclweb.org/anthology/2020.acl-main.577.pdf)* | [Flair English 18-class NER demo](https://huggingface.co/flair/ner-english-ontonotes-large) |
+| German  | Conll-03 (4-class)   |  **92.31**  | *90.3 [(Yu et al., 2016)](https://www.aclweb.org/anthology/2020.acl-main.577.pdf)* | [Flair German 4-class NER demo](https://huggingface.co/flair/ner-german-large)  |
+| Dutch  | Conll-03  (4-class)  |  **95.25**  | *93.7 [(Yu et al., 2016)](https://www.aclweb.org/anthology/2020.acl-main.577.pdf)* | [Flair Dutch 4-class NER demo](https://huggingface.co/flair/ner-dutch-large)  |
+| Spanish  | Conll-03 (4-class)   |  **90.54** | *90.3 [(Yu et al., 2016)](https://www.aclweb.org/anthology/2020.acl-main.577.pdf)* | [Flair Spanish 18-class NER demo](https://huggingface.co/flair/ner-spanish-large)  |
+
+**New:** 
+대부분의 Flair 시퀀스 태깅 모델(명명된 엔티티 인식, 품사 태깅 등)이 이제  [__🤗 HuggingFace model hub__](https://huggingface.co/models?library=flair&sort=downloads)에서 호스팅됩니다! 모델을 검색하고 학습 방법에 대한 자세한 정보를 확인하고 각 모델을 온라인으로 시험해 볼 수도 있습니다!
+
+## Quick Start
+
+### 요구사항 및 설치
+
+이 프로젝트는 PyTorch 1.5+ 및 Python 3.6+를 기반으로 합니다. 메소드 시그니처와 타입 힌트가 아름답기 때문입니다.
+Python 3.6이 없으면 먼저 설치하십시오. [Ubuntu 16.04의 경우](https://vsupalov.com/developing-with-python3-6-on-ubuntu-16-04/).
+그런 다음 선호하는 가상 환경에서 다음을 수행하십시오:
+
+```
+pip install flair
+```
+
+### 사용 예시
+
+예제 문장에 대해 NER(Named Entity Recognition)을 실행해 보겠습니다. 'Sentence'를 만들고 사전 훈련된 모델을 로드하고 이를 사용하여 문장의 태그를 예측하기만 하면 됩니다.
+
+```python
+from flair.data import Sentence
+from flair.models import SequenceTagger
+# make a sentence
+sentence = Sentence('I love Berlin .')
+# load the NER tagger
+tagger = SequenceTagger.load('ner')
+# run NER over sentence
+tagger.predict(sentence)
+```
+
+완료입니다! 이제 'Sentence'에 엔티티 주석이 있습니다. 태그가 무엇을 찾았는지 보려면 문장을 출력하세요.
+
+```python
+print(sentence)
+print('The following NER tags are found:')
+# iterate over entities and print
+for entity in sentence.get_spans('ner'):
+    print(entity)
+```
+
+출력은 다음과 같습니다:
+
+```console
+Sentence: "I love Berlin ." - 4 Tokens
+
+The following NER tags are found:
+
+Span [3]: "Berlin"   [− Labels: LOC (0.9992)]
+```
+
+## Tutorials
+
+라이브러리를 시작하는 데 도움이 되는 빠른 튜토리얼 세트를 제공합니다.
+
+* [Tutorial 1: Basics](/docs/TUTORIAL_1_BASICS.md)
+* [Tutorial 2: Tagging your Text](/docs/TUTORIAL_2_TAGGING.md)
+* [Tutorial 3: Embedding Words](/docs/TUTORIAL_3_WORD_EMBEDDING.md)
+* [Tutorial 4: List of All Word Embeddings](/docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md)
+* [Tutorial 5: Embedding Documents](/docs/TUTORIAL_5_DOCUMENT_EMBEDDINGS.md)
+* [Tutorial 6: Loading a Dataset](/docs/TUTORIAL_6_CORPUS.md)
+* [Tutorial 7: Training a Model](/docs/TUTORIAL_7_TRAINING_A_MODEL.md)
+* [Tutorial 8: Training your own Flair Embeddings](/docs/TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md)
+* [Tutorial 9: Training a Zero Shot Text Classifier (TARS)](/docs/TUTORIAL_10_TRAINING_ZERO_SHOT_MODEL.md)
+
+튜토리얼에서는 기본 NLP 클래스가 작동하는 방법, 사전 훈련된 모델을 로드하여 텍스트에 태그를 지정하는 방법, 다른 단어 또는 문서 임베딩으로 텍스트를 포함하는 방법, 고유한 언어 모델, 시퀀스 레이블링 모델 및 텍스트 분류 모델에 대해 설명하고있습니다. 불분명한 것이 있으면 알려주세요.
+
+설치 지침 및 자습서가 포함된 **[biomedical NER and datasets](https://github.com/flairNLP/flair/blob/94393aa82444f28c5a1da6057b8ff57b3cb390e6/resources/docs/HUNFLAIR.md)** 전용 랜딩 페이지도 있습니다.
+
+Flair를 사용하는 방법을 보여주는 훌륭한 타사 기사 및 게시물도 있습니다:
+* [How to build a text classifier with Flair](https://towardsdatascience.com/text-classification-with-state-of-the-art-nlp-library-flair-b541d7add21f)
+* [How to build a microservice with Flair and Flask](https://shekhargulati.com/2019/01/04/building-a-sentiment-analysis-python-microservice-with-flair-and-flask/)
+* [A docker image for Flair](https://towardsdatascience.com/docker-image-for-nlp-5402c9a9069e)
+* [Great overview of Flair functionality and how to use in Colab](https://www.analyticsvidhya.com/blog/2019/02/flair-nlp-library-python/)
+* [Visualisation tool for highlighting the extracted entities](https://github.com/lunayach/visNER)
+* [Practical approach of State-of-the-Art Flair in Named Entity Recognition](https://medium.com/analytics-vidhya/practical-approach-of-state-of-the-art-flair-in-named-entity-recognition-46a837e25e6b)
+* [Benchmarking NER algorithms](https://towardsdatascience.com/benchmark-ner-algorithm-d4ab01b2d4c3)
+* [Training a Flair text classifier on Google Cloud Platform (GCP) and serving predictions on GCP](https://github.com/robinvanschaik/flair-on-gcp)
+* [Model Interpretability for transformer-based Flair models](https://github.com/robinvanschaik/interpret-flair)
+
+## Flair 인용하기
+
+Flair 임베딩을 사용할 때 [다음 논문](https://www.aclweb.org/anthology/C18-1139/)을 인용하세요.
+
+```
+@inproceedings{akbik2018coling,
+  title={Contextual String Embeddings for Sequence Labeling},
+  author={Akbik, Alan and Blythe, Duncan and Vollgraf, Roland},
+  booktitle = {{COLING} 2018, 27th International Conference on Computational Linguistics},
+  pages     = {1638--1649},
+  year      = {2018}
+}
+```
+
+실험에 Flair 프레임워크를 사용하는 경우 [이 문서](https://www.aclweb.org/anthology/papers/N/N19/N19-4010/)를 인용하세요:
+
+```
+@inproceedings{akbik2019flair,
+  title={FLAIR: An easy-to-use framework for state-of-the-art NLP},
+  author={Akbik, Alan and Bergmann, Tanja and Blythe, Duncan and Rasul, Kashif and Schweter, Stefan and Vollgraf, Roland},
+  booktitle={{NAACL} 2019, 2019 Annual Conference of the North American Chapter of the Association for Computational Linguistics (Demonstrations)},
+  pages={54--59},
+  year={2019}
+}
+```
+
+Flair 임베딩(PooledFlairEmbeddings)의 풀링 버전을 사용하는 경우 [이 문서](https://www.aclweb.org/anthology/papers/N/N19/N19-1078/)를 인용하세요:
+
+```
+@inproceedings{akbik2019naacl,
+  title={Pooled Contextualized Embeddings for Named Entity Recognition},
+  author={Akbik, Alan and Bergmann, Tanja and Vollgraf, Roland},
+  booktitle = {{NAACL} 2019, 2019 Annual Conference of the North American Chapter of the Association for Computational Linguistics},
+  pages     = {724–728},
+  year      = {2019}
+}
+```
+
+새로운 "FLERT" 모델 또는 접근 방식을 사용하는 경우 [이 문서](https://arxiv.org/abs/2011.06993)를 인용하세요:
+
+```
+@misc{schweter2020flert,
+    title={FLERT: Document-Level Features for Named Entity Recognition},
+    author={Stefan Schweter and Alan Akbik},
+    year={2020},
+    eprint={2011.06993},
+    archivePrefix={arXiv},
+    primaryClass={cs.CL}
+```
+
+## Contact
+
+질문이나 의견은 [Alan Akbik](http://alanakbik.github.io/)로 이메일을 보내주세요.
+
+## Contributing
+
+contributing에 관심을 가져주셔서 감사합니다! 참여하는 방법에는 여러 가지가 있습니다.
+[contributor guidelines](https://github.com/flairNLP/flair/blob/94393aa82444f28c5a1da6057b8ff57b3cb390e6/CONTRIBUTING.md)으로 시작한 다음
+특정 작업에 대해서는 [open issues](https://github.com/flairNLP/flair/issues)를 확인하세요.
+
+API에 대해 더 깊이 알고자 하는 기여자의 경우 레포지토리를 복제하고 메서드를 호출하는 방법에 대한 예제를 보려면 단위 테스트를 확인하는 것이 좋습니다. 
+거의 모든 클래스와 메서드가 문서화되어 있으므로 코드를 찾는 것이 쉬울 것입니다.
+
+### 로컬에서 단위 테스트 실행
+
+이것을 위해 [Pipenv](https://pipenv.readthedocs.io/)가 필요합니다:
+
+```bash
+pipenv install --dev && pipenv shell
+pytest tests/
+```
+
+통합 테스트를 실행하려면 다음을 실행하세요:
+```bash
+pytest --runintegration tests/
+```
+통합 테스트는 작은 모델을 훈련합니다.
+그 후에 예측을 위해 훈련된 모델이 로드됩니다.
+
+또한 flair에서 제공하는 임베딩을 로드하고 사용하는 것처럼 느린 테스트를 실행하려면 다음을 실행해야 합니다:
+```bash
+pytest --runslow tests/
+```

From b14835a9369d0c54e353440c0beec4fc7d5b5750 Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:40:39 +0900
Subject: [PATCH 12/30] Update README.md

---
 resources/docs/KOR_docs/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resources/docs/KOR_docs/README.md b/resources/docs/KOR_docs/README.md
index b48c41c26..ada918cef 100644
--- a/resources/docs/KOR_docs/README.md
+++ b/resources/docs/KOR_docs/README.md
@@ -1,4 +1,4 @@
-# doc_flairNLP
+# Korean documentation for flairNLP
 flairNLP를 한국어로 이해하기 쉽게 번역한 튜토리얼 번역본입니다.    
 
 

From a8748fad6fae6225e650fa171c4d1a7d3020f103 Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:41:56 +0900
Subject: [PATCH 13/30] Update TUTORIAL_1_BASICS.md

---
 resources/docs/KOR_docs/TUTORIAL_1_BASICS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resources/docs/KOR_docs/TUTORIAL_1_BASICS.md b/resources/docs/KOR_docs/TUTORIAL_1_BASICS.md
index da07b085d..88903c04b 100644
--- a/resources/docs/KOR_docs/TUTORIAL_1_BASICS.md
+++ b/resources/docs/KOR_docs/TUTORIAL_1_BASICS.md
@@ -265,4 +265,4 @@ for label in sentence.get_labels('topic'):
 
 지금까지 문장을 만들고 수동으로 라벨을 붙이는 방법에 대해 알아보았습니다.
 
-이제 [사전 교육된 모델](/docs/TUTORIAL_2_TAGGING.md)을 사용하여 텍스트에 태그를 지정하는 방법에 대해 알아보겠습니다.
+이제 [사전 교육된 모델](/KOR_docs)/TUTORIAL_2_TAGGING.md)을 사용하여 텍스트에 태그를 지정하는 방법에 대해 알아보겠습니다.

From f4053fc8cc4be663417836e470473609a9eb2acc Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:42:35 +0900
Subject: [PATCH 14/30] Update TUTORIAL_1_BASICS.md

---
 resources/docs/KOR_docs/TUTORIAL_1_BASICS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resources/docs/KOR_docs/TUTORIAL_1_BASICS.md b/resources/docs/KOR_docs/TUTORIAL_1_BASICS.md
index 88903c04b..b21fb087d 100644
--- a/resources/docs/KOR_docs/TUTORIAL_1_BASICS.md
+++ b/resources/docs/KOR_docs/TUTORIAL_1_BASICS.md
@@ -265,4 +265,4 @@ for label in sentence.get_labels('topic'):
 
 지금까지 문장을 만들고 수동으로 라벨을 붙이는 방법에 대해 알아보았습니다.
 
-이제 [사전 교육된 모델](/KOR_docs)/TUTORIAL_2_TAGGING.md)을 사용하여 텍스트에 태그를 지정하는 방법에 대해 알아보겠습니다.
+이제 [사전 교육된 모델](/docs/KOR_docs)/TUTORIAL_2_TAGGING.md)을 사용하여 텍스트에 태그를 지정하는 방법에 대해 알아보겠습니다.

From 55472e925c8f29312fd083122ad714debe2c46e0 Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:43:02 +0900
Subject: [PATCH 15/30] Update TUTORIAL_1_BASICS.md

---
 resources/docs/KOR_docs/TUTORIAL_1_BASICS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resources/docs/KOR_docs/TUTORIAL_1_BASICS.md b/resources/docs/KOR_docs/TUTORIAL_1_BASICS.md
index b21fb087d..8ec08808a 100644
--- a/resources/docs/KOR_docs/TUTORIAL_1_BASICS.md
+++ b/resources/docs/KOR_docs/TUTORIAL_1_BASICS.md
@@ -265,4 +265,4 @@ for label in sentence.get_labels('topic'):
 
 지금까지 문장을 만들고 수동으로 라벨을 붙이는 방법에 대해 알아보았습니다.
 
-이제 [사전 교육된 모델](/docs/KOR_docs)/TUTORIAL_2_TAGGING.md)을 사용하여 텍스트에 태그를 지정하는 방법에 대해 알아보겠습니다.
+이제 [사전 교육된 모델](/docs/KOR_docs/TUTORIAL_2_TAGGING.md)을 사용하여 텍스트에 태그를 지정하는 방법에 대해 알아보겠습니다.

From bf435129261ccd93492c501b938cec6fe78a8a22 Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:43:47 +0900
Subject: [PATCH 16/30] Update TUTORIAL_1_BASICS.md

---
 resources/docs/KOR_docs/TUTORIAL_1_BASICS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resources/docs/KOR_docs/TUTORIAL_1_BASICS.md b/resources/docs/KOR_docs/TUTORIAL_1_BASICS.md
index 8ec08808a..2819b65e5 100644
--- a/resources/docs/KOR_docs/TUTORIAL_1_BASICS.md
+++ b/resources/docs/KOR_docs/TUTORIAL_1_BASICS.md
@@ -265,4 +265,4 @@ for label in sentence.get_labels('topic'):
 
 지금까지 문장을 만들고 수동으로 라벨을 붙이는 방법에 대해 알아보았습니다.
 
-이제 [사전 교육된 모델](/docs/KOR_docs/TUTORIAL_2_TAGGING.md)을 사용하여 텍스트에 태그를 지정하는 방법에 대해 알아보겠습니다.
+이제 [사전 교육된 모델](/resources/docs/KOR_docs/TUTORIAL_2_TAGGING.md)을 사용하여 텍스트에 태그를 지정하는 방법에 대해 알아보겠습니다.

From 8625300726632aab63148dbce65902d1b2fe5dc3 Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:44:08 +0900
Subject: [PATCH 17/30] Update TUTORIAL_2_TAGGING.md

---
 resources/docs/KOR_docs/TUTORIAL_2_TAGGING.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resources/docs/KOR_docs/TUTORIAL_2_TAGGING.md b/resources/docs/KOR_docs/TUTORIAL_2_TAGGING.md
index 44d9274a3..15b4d9eaf 100644
--- a/resources/docs/KOR_docs/TUTORIAL_2_TAGGING.md
+++ b/resources/docs/KOR_docs/TUTORIAL_2_TAGGING.md
@@ -362,4 +362,4 @@ TARS는 임의 클래스에 대해 텍스트 분류를 수행할 수 있습니
 
 ## 다음
 
-이제 텍스트를 포함하기 위해 다른 [워드 임베딩](/리소스/docs/TUTORIAL_3_WORD_EMBeding.md)을 사용하는 방법에 대해 알아보겠습니다.
+이제 텍스트를 포함하기 위해 다른 [워드 임베딩](/resources/docs/KOR_docs/TUTORIAL_3_WORD_EMBeding.md)을 사용하는 방법에 대해 알아보겠습니다.

From 48cc7693cc99e4fd19e6f37375c05f8da2db633e Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:44:46 +0900
Subject: [PATCH 18/30] Update TUTORIAL_2_TAGGING.md

---
 resources/docs/KOR_docs/TUTORIAL_2_TAGGING.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resources/docs/KOR_docs/TUTORIAL_2_TAGGING.md b/resources/docs/KOR_docs/TUTORIAL_2_TAGGING.md
index 15b4d9eaf..43e7e2dee 100644
--- a/resources/docs/KOR_docs/TUTORIAL_2_TAGGING.md
+++ b/resources/docs/KOR_docs/TUTORIAL_2_TAGGING.md
@@ -362,4 +362,4 @@ TARS는 임의 클래스에 대해 텍스트 분류를 수행할 수 있습니
 
 ## 다음
 
-이제 텍스트를 포함하기 위해 다른 [워드 임베딩](/resources/docs/KOR_docs/TUTORIAL_3_WORD_EMBeding.md)을 사용하는 방법에 대해 알아보겠습니다.
+이제 텍스트를 포함하기 위해 다른 [워드 임베딩](/resources/docs/KOR_docs/TUTORIAL_3_WORD_EMBEDDING.md)을 사용하는 방법에 대해 알아보겠습니다.

From 30dc58447e93123c67c6510400c636861632e9fe Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:46:25 +0900
Subject: [PATCH 19/30] Update TUTORIAL_3_WORD_EMBEDDING.md

---
 resources/docs/KOR_docs/TUTORIAL_3_WORD_EMBEDDING.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/docs/KOR_docs/TUTORIAL_3_WORD_EMBEDDING.md b/resources/docs/KOR_docs/TUTORIAL_3_WORD_EMBEDDING.md
index af49ca953..d857ea9b7 100644
--- a/resources/docs/KOR_docs/TUTORIAL_3_WORD_EMBEDDING.md
+++ b/resources/docs/KOR_docs/TUTORIAL_3_WORD_EMBEDDING.md
@@ -11,7 +11,7 @@
 우리의 방법으로 생산된 모든 임베딩은 PyTorch 벡터이기 때문에 즉시 훈련에 사용될 수 있고 미세 조정이 가능합니다.
 
 이 튜토리얼에서는 몇 가지 일반적인 임베딩을 소개하고 사용 방법을 보여줍니다. 
-이러한 임베딩에 대한 자세한 내용과 지원되는 모든 임베딩에 대한 개요는 [여기](/docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMbeding.md)를 참조하세요.
+이러한 임베딩에 대한 자세한 내용과 지원되는 모든 임베딩에 대한 개요는 [여기](resources/docs/KOR_docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md)를 참조하세요.
 
 ## 클래식 워드 임베딩
 
@@ -150,4 +150,4 @@ for token in sentence:
 
 ## Next 
 이러한 임베딩에 대한 자세한 내용과 지원되는 모든 단어 임베딩에 대한 전체 개요를 보려면 다음을 참조하십시오.
-[튜토리얼](/리소스/docs/튜토리얼_4_ELMO_BERT_FLAIR_EMBeding.md). 
+[튜토리얼](resources/docs/KOR_docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md). 

From 040a393f18618a6fbe1737369476d777f5f40e0d Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:46:51 +0900
Subject: [PATCH 20/30] Update TUTORIAL_3_WORD_EMBEDDING.md

---
 resources/docs/KOR_docs/TUTORIAL_3_WORD_EMBEDDING.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/docs/KOR_docs/TUTORIAL_3_WORD_EMBEDDING.md b/resources/docs/KOR_docs/TUTORIAL_3_WORD_EMBEDDING.md
index d857ea9b7..46a41d43e 100644
--- a/resources/docs/KOR_docs/TUTORIAL_3_WORD_EMBEDDING.md
+++ b/resources/docs/KOR_docs/TUTORIAL_3_WORD_EMBEDDING.md
@@ -11,7 +11,7 @@
 우리의 방법으로 생산된 모든 임베딩은 PyTorch 벡터이기 때문에 즉시 훈련에 사용될 수 있고 미세 조정이 가능합니다.
 
 이 튜토리얼에서는 몇 가지 일반적인 임베딩을 소개하고 사용 방법을 보여줍니다. 
-이러한 임베딩에 대한 자세한 내용과 지원되는 모든 임베딩에 대한 개요는 [여기](resources/docs/KOR_docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md)를 참조하세요.
+이러한 임베딩에 대한 자세한 내용과 지원되는 모든 임베딩에 대한 개요는 [여기](/resources/docs/KOR_docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md)를 참조하세요.
 
 ## 클래식 워드 임베딩
 
@@ -150,4 +150,4 @@ for token in sentence:
 
 ## Next 
 이러한 임베딩에 대한 자세한 내용과 지원되는 모든 단어 임베딩에 대한 전체 개요를 보려면 다음을 참조하십시오.
-[튜토리얼](resources/docs/KOR_docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md). 
+[튜토리얼](/resources/docs/KOR_docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md). 

From cb43182d8276657662e7ed74a2e1e20a0669f653 Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:48:02 +0900
Subject: [PATCH 21/30] Update TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md

---
 .../docs/KOR_docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md     | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/docs/KOR_docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md b/resources/docs/KOR_docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md
index 48dd4ee27..71db82d51 100644
--- a/resources/docs/KOR_docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md
+++ b/resources/docs/KOR_docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md
@@ -1,5 +1,5 @@
 # 튜토리얼 4 : Word Embedding의 종류
-이번 챕터는 튜토리얼이라기보다 Flair에서 지원하는 Embedding의 종류를 소개합니다. 아래 테이블의 Embedding을 클릭해 사용법을 볼 수 있습니다. 설명들은 [base types](/resources/docs/TUTORIAL_1_BASICS.md)과 [standard word embeddings](/resources/docs/TUTORIAL_3_WORD_EMBEDDING.md), 그리고 `StackedEmbeddings`클래스에 익숙하다는 전제로 작성되어 있습니다.
+이번 챕터는 튜토리얼이라기보다 Flair에서 지원하는 Embedding의 종류를 소개합니다. 아래 테이블의 Embedding을 클릭해 사용법을 볼 수 있습니다. 설명들은 [base types](/resources/docs/KOR_docs/TUTORIAL_1_BASICS.md)과 [standard word embeddings](/resources/docs/KOR_docs/TUTORIAL_3_WORD_EMBEDDING.md), 그리고 `StackedEmbeddings`클래스에 익숙하다는 전제로 작성되어 있습니다.
 
 ## 개요
 모든 word embedding 클래스들은 `TokenEmbeddings` 클래스를 상속하고 있으며 텍스트를 임베드 하기 위해 `embed()` 메소드를 호출합니다. Flair를 사용하는 대부분의 경우 다양하고 복잡한 embedding 과정이 인터페이스 뒤로 숨겨져 있습니다. 사용자는 단순히 필요한 embedding 클래스를 인스턴스화하고 `embed()`를 호출해 텍스트를 임베드 하면 됩니다.
@@ -61,4 +61,4 @@ for token in sentence:
 단어들은 세 가지 다른 임베딩이 조합된 것으로 임베드 되었습니다. output은 여전히 PyTorch 벡터입니다.
 
 ## 다음 튜토리얼
-텍스트 분류와 같은 작업을 위해 전체 텍스트 [문서를 임베드](/resources/docs/TUTORIAL_5_DOCUMENT_EMBEDDINGS.md)하는 튜토리얼 혹은 [나만의 모델을 훈련](/resources/docs/TUTORIAL_7_TRAINING_A_MODEL.md)하기 위한 전제조건인 [말뭉치(corpus)를 로드](/resources/docs/TUTORIAL_6_CORPUS.md)하는 튜토리얼이 준비되어 있습니다.
+텍스트 분류와 같은 작업을 위해 전체 텍스트 [문서를 임베드](/resources/docs/KOR_docs/TUTORIAL_5_DOCUMENT_EMBEDDINGS.md)하는 튜토리얼 혹은 [나만의 모델을 훈련](/resources/docs/KOR_docs/TUTORIAL_7_TRAINING_A_MODEL.md)하기 위한 전제조건인 [말뭉치(corpus)를 로드](/resources/docs/KOR_docs/TUTORIAL_6_CORPUS.md)하는 튜토리얼이 준비되어 있습니다.

From e75f8aeb4b94bd2e3b4142ee8ad6a6baf133ae4c Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:48:50 +0900
Subject: [PATCH 22/30] Update TUTORIAL_5_DOCUMENT_EMBEDDINGS.md

---
 resources/docs/KOR_docs/TUTORIAL_5_DOCUMENT_EMBEDDINGS.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/resources/docs/KOR_docs/TUTORIAL_5_DOCUMENT_EMBEDDINGS.md b/resources/docs/KOR_docs/TUTORIAL_5_DOCUMENT_EMBEDDINGS.md
index 5c214aa6c..740f548b4 100644
--- a/resources/docs/KOR_docs/TUTORIAL_5_DOCUMENT_EMBEDDINGS.md
+++ b/resources/docs/KOR_docs/TUTORIAL_5_DOCUMENT_EMBEDDINGS.md
@@ -1,7 +1,7 @@
 # 튜토리얼 5: 문서 임베딩
-우리가 앞서 살펴본 [단어 임베딩](/resources/docs/TUTORIAL_3_WORD_EMBEDDING.md)은 개별 단어에 대한 임베딩을 제공했습니다. 이번에 살펴볼 문서 임베딩은 전체 텍스트에 대해 하나의 임베딩을 제공합니다.
+우리가 앞서 살펴본 [단어 임베딩](/resources/docs/KOR_docs/TUTORIAL_3_WORD_EMBEDDING.md)은 개별 단어에 대한 임베딩을 제공했습니다. 이번에 살펴볼 문서 임베딩은 전체 텍스트에 대해 하나의 임베딩을 제공합니다.
 
-이번 튜토리얼은 여러분이 라이브러리의 [기본 유형](/resources/docs/TUTORIAL_1_BASICS.md)과 [단어 임베딩](/resources/docs/TUTORIAL_3_WORD_EMBEDDING.md)이 어떻게 동작하는지 익숙하다 가정하고 진행됩니다.
+이번 튜토리얼은 여러분이 라이브러리의 [기본 유형](/resources/docs/KOR_docs/TUTORIAL_1_BASICS.md)과 [단어 임베딩](/resources/docs/KOR_docs/TUTORIAL_3_WORD_EMBEDDING.md)이 어떻게 동작하는지 익숙하다 가정하고 진행됩니다.
 
 ## 임베딩
 모든 문서 임베딩 클래스들은 `DocumentEmbeddings` 클래스를 상속하며 텍스트를 임베드 하기 위해 `embed()` 메소드를 호출합니다.
@@ -129,4 +129,4 @@ embedding.embed(sentence)
 `pip install sentence-transformers`. 
 
 ## 다음 튜토리얼
-[나만의 모델을 훈련](/resources/docs/TUTORIAL_7_TRAINING_A_MODEL.md)하기 위한 전제조건인 [말뭉치(corpus)를 로드](/resources/docs/TUTORIAL_6_CORPUS.md)하는 튜토리얼이 준비되어 있습니다.
+[나만의 모델을 훈련](/resources/docs/KOR_docs/TUTORIAL_7_TRAINING_A_MODEL.md)하기 위한 전제조건인 [말뭉치(corpus)를 로드](/resources/docs/KOR_docs/TUTORIAL_6_CORPUS.md)하는 튜토리얼이 준비되어 있습니다.

From 76e1553a58484a5590d30d99f26e2e38d0f895b8 Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:49:48 +0900
Subject: [PATCH 23/30] Update TUTORIAL_5_DOCUMENT_EMBEDDINGS.md

---
 resources/docs/KOR_docs/TUTORIAL_5_DOCUMENT_EMBEDDINGS.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resources/docs/KOR_docs/TUTORIAL_5_DOCUMENT_EMBEDDINGS.md b/resources/docs/KOR_docs/TUTORIAL_5_DOCUMENT_EMBEDDINGS.md
index 740f548b4..c0d1e31ab 100644
--- a/resources/docs/KOR_docs/TUTORIAL_5_DOCUMENT_EMBEDDINGS.md
+++ b/resources/docs/KOR_docs/TUTORIAL_5_DOCUMENT_EMBEDDINGS.md
@@ -75,7 +75,7 @@ print(sentence.get_embedding())
 **주의** RNN 임베딩을 초기화하면 RNN 가중치가 무작위로 초기화됩니다! 사용을 위해서 사용자의 의도에 알맞게 훈련돼야 합니다.
 
 ## TransformerDocumentEmbeddings
-이미 훈련된 [변환기](https://github.com/huggingface/transformers를 통해 전체 문장을 임베딩합니다. 임베딩의 식별자를 통해 다른 변환기를 사용할 수 있습니다.
+이미 훈련된 [변환기](https://github.com/huggingface/transformers)를 통해 전체 문장을 임베딩합니다. 임베딩의 식별자를 통해 다른 변환기를 사용할 수 있습니다.
 
 표준 BERT 변환 모델의 예시입니다:
 ```python

From a9d8422878df165b6fe19a222f63cd6ab31b37c6 Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:52:58 +0900
Subject: [PATCH 24/30] Update TUTORIAL_6_CORPUS.md

---
 resources/docs/KOR_docs/TUTORIAL_6_CORPUS.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/docs/KOR_docs/TUTORIAL_6_CORPUS.md b/resources/docs/KOR_docs/TUTORIAL_6_CORPUS.md
index cd510015f..31727a2e7 100644
--- a/resources/docs/KOR_docs/TUTORIAL_6_CORPUS.md
+++ b/resources/docs/KOR_docs/TUTORIAL_6_CORPUS.md
@@ -1,6 +1,6 @@
 # 튜토리얼 6 : 훈련 데이터 불러오기
 이번 튜토리얼은 모델을 훈련하기 위해 말뭉치(corpus)를 로드하는 내용을 다룹니다. 
-이번 튜토리얼은 여러분이 라이브러리의 [기본 유형](/resources/docs/TUTORIAL_1_BASICS.md)에 익숙하다 가정하고 진행됩니다.
+이번 튜토리얼은 여러분이 라이브러리의 [기본 유형](/resources/docs/KOR_docs/TUTORIAL_1_BASICS.md)에 익숙하다 가정하고 진행됩니다.
 
 ## 말뭉치 오브젝트
 `corpus`는 모델을 훈련하는데 사용되는 데이터 세트입니다. 이는 모델 훈련 중 훈련, 검증 및 테스트 분할에 사용되는 문장들, 개발을 위한 문장 목록 및 테스트 문장 목록으로 구성됩니다.
@@ -483,4 +483,4 @@ corpus: Corpus = ClassificationCorpus(data_folder,
 `FastText` 형식은 열이 없기 때문에 주석의 이름을 직접 정의해야 합니다. 위 예제는 `label_type='topic'`인 말뭉치를 로드하고 있음을 나타냅니다.
 
 ## 다음 튜토리얼
-이제 [나만의 모델을 훈련](/resources/docs/TUTORIAL_7_TRAINING_A_MODEL.md)을 알아보겠습니다.
+이제 [나만의 모델을 훈련](/resources/docs/KOR_docs/TUTORIAL_7_TRAINING_A_MODEL.md)을 알아보겠습니다.

From 4232f0360d64c5c9961fc5febb08a59f8f56e983 Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:53:46 +0900
Subject: [PATCH 25/30] Update TUTORIAL_7_TRAINING_A_MODEL.md

---
 .../docs/KOR_docs/TUTORIAL_7_TRAINING_A_MODEL.md   | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/resources/docs/KOR_docs/TUTORIAL_7_TRAINING_A_MODEL.md b/resources/docs/KOR_docs/TUTORIAL_7_TRAINING_A_MODEL.md
index 8e89bd524..b05716357 100644
--- a/resources/docs/KOR_docs/TUTORIAL_7_TRAINING_A_MODEL.md
+++ b/resources/docs/KOR_docs/TUTORIAL_7_TRAINING_A_MODEL.md
@@ -4,10 +4,10 @@
 훈련하는 방법을 살펴볼 것입니다.
 
 이 튜토리얼을 학습하기 전에, 다음의 항목들을 이미 알고있다고 가정할 것입니다.
-* Base types: [TUTORIAL_1_BASICS](/docs/TUTORIAL_1_BASICS.md)
-* Word embeddings: [TUTORIAL_3_WORD_EMBEDDING](/docs/TUTORIAL_3_WORD_EMBEDDING.md)
-* Flair embeddings: [TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING](/docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md)
-* Load a corpus: [TUTORIAL_6_CORPUS](/docs/TUTORIAL_6_CORPUS.md)
+* Base types: [TUTORIAL_1_BASICS](/resources/docs/KOR_docs/TUTORIAL_1_BASICS.md)
+* Word embeddings: [TUTORIAL_3_WORD_EMBEDDING](/resources/docs/KOR_docs/TUTORIAL_3_WORD_EMBEDDING.md)
+* Flair embeddings: [TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING](/resources/docs/KOR_docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md)
+* Load a corpus: [TUTORIAL_6_CORPUS](/resources/docs/KOR_docs/TUTORIAL_6_CORPUS.md)
 
 
 
@@ -78,7 +78,7 @@ print(sentence.to_tagged_string())
 
 
 NER에 대한 시퀀스 레이블링 모델을 훈련하려면 위의 스크립트를 약간만 수정하면 됩니다.   
-CONLL_03(데이터를 수동으로 다운로드하거나 [different NER corpus](/docs/TUTORIAL_6_CORPUS.md#datasets-included-in-flair) 사용)과 같은 NER corpus를 로드하고,  
+CONLL_03(데이터를 수동으로 다운로드하거나 [different NER corpus](/resources/docs/KOR_docs/TUTORIAL_6_CORPUS.md#datasets-included-in-flair) 사용)과 같은 NER corpus를 로드하고,  
 `label_type'을 'ner'로 변경한 후, GloVe 및 Flair로 구성된 'StackedEmbedding'을 사용하세요:
 
 ```python
@@ -377,7 +377,7 @@ Flair의 많은 임베딩은 런타임 측면에서 생성하는 데 다소 비
 ## Next
 
 훈련 데이터가 없거나 아주 적은 경우 TARS 접근 방식이 가장 적합할 수 있습니다.   
-[TUTORIAL_10_TRAINING_ZERO_SHOT_MODEL](/docs/TUTORIAL_10_TRAINING_ZERO_SHOT_MODEL.md): Few-shot and zero-shot classification에 대한 TARS 튜토리얼을 확인하세요.
+[TUTORIAL_10_TRAINING_ZERO_SHOT_MODEL](/resources/docs/KOR_docs/TUTORIAL_10_TRAINING_ZERO_SHOT_MODEL.md): Few-shot and zero-shot classification에 대한 TARS 튜토리얼을 확인하세요.
 
 또는   
-[TUTORIAL_9_TRAINING_LM_EMBEDDINGS](/docs/TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md): Training your own embeddings을 살펴보세요.
+[TUTORIAL_9_TRAINING_LM_EMBEDDINGS](/resources/docs/KOR_docs/TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md): Training your own embeddings을 살펴보세요.

From 794aa2266212525d6fd823965ddbe4eec789d14f Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:54:34 +0900
Subject: [PATCH 26/30] Update TUTORIAL_8_MODEL_OPTIMIZATION.md

---
 resources/docs/KOR_docs/TUTORIAL_8_MODEL_OPTIMIZATION.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/resources/docs/KOR_docs/TUTORIAL_8_MODEL_OPTIMIZATION.md b/resources/docs/KOR_docs/TUTORIAL_8_MODEL_OPTIMIZATION.md
index 962cca443..d3ebcfe0b 100644
--- a/resources/docs/KOR_docs/TUTORIAL_8_MODEL_OPTIMIZATION.md
+++ b/resources/docs/KOR_docs/TUTORIAL_8_MODEL_OPTIMIZATION.md
@@ -8,7 +8,7 @@ Flair에는 잘 알려진 하이퍼 매개변수 선택 도구인 [hyperopt](htt
 
 먼저 말뭉치를 로드해야 합니다. 다음 예에서 사용된 [AGNews corpus](https://www.di.unipi.it/~gulli/AG_corpus_of_news_articles.html)를 로드하려면 
 먼저 다운로드하여 올바른 형식으로 변환하세요.   
-자세한 내용은 [tutorial 6](/docs/TUTORIAL_6_CORPUS.md)을 확인하세요.
+자세한 내용은 [tutorial 6](/resources/docs/KOR_docs/TUTORIAL_6_CORPUS.md)을 확인하세요.
 
 ```python
 from flair.datasets import TREC_6
@@ -143,4 +143,4 @@ trainer.train(
 
 ## Next
 
-다음 튜토리얼에서는 [training your own embeddings](/docs/TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md)에 대해 살펴볼 것입니다,
+다음 튜토리얼에서는 [training your own embeddings](/resources/docs/KOR_docs/TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md)에 대해 살펴볼 것입니다,

From 82d21ad7500f3f2aab3b397bf5031dafe94e1896 Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 13:59:25 +0900
Subject: [PATCH 27/30] Update README.md

---
 resources/docs/KOR_docs/README.md | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/resources/docs/KOR_docs/README.md b/resources/docs/KOR_docs/README.md
index ada918cef..70f944efe 100644
--- a/resources/docs/KOR_docs/README.md
+++ b/resources/docs/KOR_docs/README.md
@@ -99,15 +99,15 @@ Span [3]: "Berlin"   [− Labels: LOC (0.9992)]
 
 라이브러리를 시작하는 데 도움이 되는 빠른 튜토리얼 세트를 제공합니다.
 
-* [Tutorial 1: Basics](/docs/TUTORIAL_1_BASICS.md)
-* [Tutorial 2: Tagging your Text](/docs/TUTORIAL_2_TAGGING.md)
-* [Tutorial 3: Embedding Words](/docs/TUTORIAL_3_WORD_EMBEDDING.md)
-* [Tutorial 4: List of All Word Embeddings](/docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md)
-* [Tutorial 5: Embedding Documents](/docs/TUTORIAL_5_DOCUMENT_EMBEDDINGS.md)
-* [Tutorial 6: Loading a Dataset](/docs/TUTORIAL_6_CORPUS.md)
-* [Tutorial 7: Training a Model](/docs/TUTORIAL_7_TRAINING_A_MODEL.md)
-* [Tutorial 8: Training your own Flair Embeddings](/docs/TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md)
-* [Tutorial 9: Training a Zero Shot Text Classifier (TARS)](/docs/TUTORIAL_10_TRAINING_ZERO_SHOT_MODEL.md)
+* [Tutorial 1: Basics](/resources/docs/KOR_docs/TUTORIAL_1_BASICS.md)
+* [Tutorial 2: Tagging your Text](/resources/docs/KOR_docs/TUTORIAL_2_TAGGING.md)
+* [Tutorial 3: Embedding Words](/resources/docs/KOR_docs/TUTORIAL_3_WORD_EMBEDDING.md)
+* [Tutorial 4: List of All Word Embeddings](/resources/docs/KOR_docs/TUTORIAL_4_ELMO_BERT_FLAIR_EMBEDDING.md)
+* [Tutorial 5: Embedding Documents](/resources/docs/KOR_docs/TUTORIAL_5_DOCUMENT_EMBEDDINGS.md)
+* [Tutorial 6: Loading a Dataset](/resources/docs/KOR_docs/TUTORIAL_6_CORPUS.md)
+* [Tutorial 7: Training a Model](/resources/docs/KOR_docs/TUTORIAL_7_TRAINING_A_MODEL.md)
+* [Tutorial 8: Training your own Flair Embeddings](/resources/docs/KOR_docs/TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md)
+* [Tutorial 9: Training a Zero Shot Text Classifier (TARS)](/resources/docs/KOR_docs/TUTORIAL_10_TRAINING_ZERO_SHOT_MODEL.md)
 
 튜토리얼에서는 기본 NLP 클래스가 작동하는 방법, 사전 훈련된 모델을 로드하여 텍스트에 태그를 지정하는 방법, 다른 단어 또는 문서 임베딩으로 텍스트를 포함하는 방법, 고유한 언어 모델, 시퀀스 레이블링 모델 및 텍스트 분류 모델에 대해 설명하고있습니다. 불분명한 것이 있으면 알려주세요.
 

From e010d617bc65fac14c781059f7ebb38133dbca7b Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 14:00:48 +0900
Subject: [PATCH 28/30] Update README.md

---
 resources/docs/KOR_docs/README.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/resources/docs/KOR_docs/README.md b/resources/docs/KOR_docs/README.md
index 70f944efe..a04a844ca 100644
--- a/resources/docs/KOR_docs/README.md
+++ b/resources/docs/KOR_docs/README.md
@@ -106,8 +106,9 @@ Span [3]: "Berlin"   [− Labels: LOC (0.9992)]
 * [Tutorial 5: Embedding Documents](/resources/docs/KOR_docs/TUTORIAL_5_DOCUMENT_EMBEDDINGS.md)
 * [Tutorial 6: Loading a Dataset](/resources/docs/KOR_docs/TUTORIAL_6_CORPUS.md)
 * [Tutorial 7: Training a Model](/resources/docs/KOR_docs/TUTORIAL_7_TRAINING_A_MODEL.md)
-* [Tutorial 8: Training your own Flair Embeddings](/resources/docs/KOR_docs/TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md)
-* [Tutorial 9: Training a Zero Shot Text Classifier (TARS)](/resources/docs/KOR_docs/TUTORIAL_10_TRAINING_ZERO_SHOT_MODEL.md)
+* [Tutorial 8: Training your own Flair Embeddings](/resources/docs/KOR_docs/TUTORIAL_8_TRAINING_LM_EMBEDDINGS.md)
+* [Tutorial 9: Training your own Flair Embeddings](/resources/docs/KOR_docs/TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md)
+* [Tutorial 10: Training a Zero Shot Text Classifier (TARS)](/resources/docs/KOR_docs/TUTORIAL_10_TRAINING_ZERO_SHOT_MODEL.md)
 
 튜토리얼에서는 기본 NLP 클래스가 작동하는 방법, 사전 훈련된 모델을 로드하여 텍스트에 태그를 지정하는 방법, 다른 단어 또는 문서 임베딩으로 텍스트를 포함하는 방법, 고유한 언어 모델, 시퀀스 레이블링 모델 및 텍스트 분류 모델에 대해 설명하고있습니다. 불분명한 것이 있으면 알려주세요.
 

From 5b086a8f899b87500dd45f3f1c025e5295a3f34f Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 14:01:04 +0900
Subject: [PATCH 29/30] Update README.md

---
 resources/docs/KOR_docs/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/resources/docs/KOR_docs/README.md b/resources/docs/KOR_docs/README.md
index a04a844ca..2c0d2bbc9 100644
--- a/resources/docs/KOR_docs/README.md
+++ b/resources/docs/KOR_docs/README.md
@@ -106,7 +106,7 @@ Span [3]: "Berlin"   [− Labels: LOC (0.9992)]
 * [Tutorial 5: Embedding Documents](/resources/docs/KOR_docs/TUTORIAL_5_DOCUMENT_EMBEDDINGS.md)
 * [Tutorial 6: Loading a Dataset](/resources/docs/KOR_docs/TUTORIAL_6_CORPUS.md)
 * [Tutorial 7: Training a Model](/resources/docs/KOR_docs/TUTORIAL_7_TRAINING_A_MODEL.md)
-* [Tutorial 8: Training your own Flair Embeddings](/resources/docs/KOR_docs/TUTORIAL_8_TRAINING_LM_EMBEDDINGS.md)
+* [Tutorial 8: Training your own Flair Embeddings](/resources/docs/KOR_docs/TUTORIAL_8_MODEL_OPTIMIZATION.md)
 * [Tutorial 9: Training your own Flair Embeddings](/resources/docs/KOR_docs/TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md)
 * [Tutorial 10: Training a Zero Shot Text Classifier (TARS)](/resources/docs/KOR_docs/TUTORIAL_10_TRAINING_ZERO_SHOT_MODEL.md)
 

From 7945cbaa903aecad17ffd127f5077381eacbdf0a Mon Sep 17 00:00:00 2001
From: Taehyuny <31824443+Taehyuny@users.noreply.github.com>
Date: Sat, 13 Nov 2021 14:02:51 +0900
Subject: [PATCH 30/30] Update README.md

---
 resources/docs/KOR_docs/README.md | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/resources/docs/KOR_docs/README.md b/resources/docs/KOR_docs/README.md
index 2c0d2bbc9..e1d970538 100644
--- a/resources/docs/KOR_docs/README.md
+++ b/resources/docs/KOR_docs/README.md
@@ -107,8 +107,7 @@ Span [3]: "Berlin"   [− Labels: LOC (0.9992)]
 * [Tutorial 6: Loading a Dataset](/resources/docs/KOR_docs/TUTORIAL_6_CORPUS.md)
 * [Tutorial 7: Training a Model](/resources/docs/KOR_docs/TUTORIAL_7_TRAINING_A_MODEL.md)
 * [Tutorial 8: Training your own Flair Embeddings](/resources/docs/KOR_docs/TUTORIAL_8_MODEL_OPTIMIZATION.md)
-* [Tutorial 9: Training your own Flair Embeddings](/resources/docs/KOR_docs/TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md)
-* [Tutorial 10: Training a Zero Shot Text Classifier (TARS)](/resources/docs/KOR_docs/TUTORIAL_10_TRAINING_ZERO_SHOT_MODEL.md)
+* [Tutorial 9: Training a Zero Shot Text Classifier (TARS)](/resources/docs/KOR_docs/TUTORIAL_9_TRAINING_LM_EMBEDDINGS.md)
 
 튜토리얼에서는 기본 NLP 클래스가 작동하는 방법, 사전 훈련된 모델을 로드하여 텍스트에 태그를 지정하는 방법, 다른 단어 또는 문서 임베딩으로 텍스트를 포함하는 방법, 고유한 언어 모델, 시퀀스 레이블링 모델 및 텍스트 분류 모델에 대해 설명하고있습니다. 불분명한 것이 있으면 알려주세요.