interscript · AhMohsen46 · Oct 29, 2021
diff --git a/maps/iso-ara-Arab-Latn-223-1-2021.imp b/maps/iso-ara-Arab-Latn-223-1-2021.imp
@@ -0,0 +1,320 @@
+metadata {
+  authority_id: iso
+  id: 233-1-2021
+  language: iso-639-2:ara
+  source_script: Arab
+  destination_script: Latn
+  name: ISO 233:1984 Documentation -- Transliteration of Arabic characters into Latin characters
+  url:
+    - https://www.iso.org/standard/4117.html
+    - http://transliteration.eki.ee/pdf/Arabic_2.2.pdf
+    - http://www.eki.ee/wgrs/rom1_ar.pdf
+  creation_date: 1984
+  confirmation_date: 2018-06
+  description: |
+    This part of ISO 233 is one of a series of International Standards, dealing
+    with the conversion of systems of writing. The aim of this part of ISO 233 and
+    others in the series is to provide a means for international communication of
+    written messages in a form which permits the automatic transmission and
+    reconstitution of these, by men or machines. The system of conversion, in this
+    case, must be univocal and entirely reversible to allow for retransliteration.
+    This means that consideration to phonetic and aesthetic matters or to certain
+    national customs is not a priority: all these considerations are, indeed,
+    ignored by the machine performing the function. This part of ISO 233 may be
+    used by anyone who has a clear understanding of the system and is certain that
+    it can be applied without ambiguity. The result obtained will not give a
+    correct pronunciation of the original text in a person’s own language, but it
+    will serve as a means of finding automatically the original graphism and thus
+    allow anyone who has knowledge of the original language to pronounce it
+    correctly. Similarly, one can only pronounce correctly a text written in, for
+    example, English or Polish, if one has a knowledge of English or Polish. The
+    adoption of this part of ISO 233 for international communication leaves every
+    country free to adopt for its own use a national standard which may be
+    different, on condition that it is compatible with this part of ISO 233. The
+    system proposed herein should make this possible and be acceptable to
+    international use if the graphisms it creates are such that they may be
+    converted automatically into the graphisms used in any strict national systems.
+    The adoption of national standards compatible with this part of ISO 233 will
+    permit the representation, in an international publication, of the morphemes of
+    each language according to the customs of the country where it is spoken. It
+    will be possible to simplify this representation in order to take into account
+    the number of the character sets available on different kinds of machines.
+  notes:
+    - |
+      The transliteration ISO 233:1984 WRT ara-arab-latn-2017 gives every character and diacritical mark a unique
+      equivalent and e.g. long vowels in Arabic ā, ī and ū are consequently written a’, iy and uw
+      respectively in the ISO transliteration. Other main correspondences
+      ث is ṯ instead of th
+      ج is ǧ instead of j
+      ح is ḥ instead of ẖ
+      خ is ẖ instead of kh
+      ذ is ḏ instead of dh
+      ش is š instead of sh
+      ص is ṣ instead of s̱
+      ض is ḍ instead of ḏ
+      ط is ṭ instead of ṯ
+      ظ is ẓ instead of d͟h
+      غ is ġ instead of gh
+      ة is ẗ instead of h/t
+      ى is ỳ
+      ـِي is iy instead of ī
+      ـُو is uw instead of ū
+      ـَا is a’ instead of ā
+      ـَى is aỳ instead of á
+}
+
+tests {
+  test "مِصر", "Miṣr"
+  test "قَطَر", "Qaṭar"
+  test "الجُمهُورِيَّة العِرَاقِيَّة", "Al Ǧumhuwriyaẗ al ‘Ira’qiyaẗ"
+  test "جُمهُورِيَّة مِصر العَرَبِيَّة", "Ǧumhuwriyaẗ Miṣr al ‘Arabiyaẗ"
+  test "الرِيَاض", "Ar Riya’ḍ"
+  test "الشارِقة", "Aš Šâriqaẗ"
+}
+
+stage {
+
+  # CHARACTERS
+  parallel {
+
+
+
+
+    # See note B
+    sub boundary + "\u0627\u0644", "al " # ال
+    # '\uFE8E' : ''  # ﺎ
+
+
+
+    sub "\u0627", "’" # ا
+    sub "\u0622", "’â" # آ
+    sub "\u0621", maybe("`") # ء# see note A
+
+    sub "\u0623", "'" # أ
+    sub "\u0624", "'" # ؤ
+    sub "\u0626", "’" # ئ
+
+    sub "\u0628", "b" # ب
+    sub "\uFE91", "b" # ﺑ
+    sub "\uFE92", "b" # ﺒ
+    sub "\uFE90", "b" # ﺐ
+
+    # See note C
+    sub "\u062a", "t" # ت
+    sub "\ufe97", "t" # ﺗ
+    sub "\ufe98", "t" # ﺘ
+    sub "\ufe96", "t" # ﺖ
+
+    sub "\u062b", "ṯ" # ث
+    sub "\ufe9b", "ṯ" # ﺛ
+    sub "\ufe9c", "ṯ" # ﺜ
+    sub "\ufe9a", "ṯ" # ﺚ
+
+    sub "\u062c", "ǧ" # ج
+    sub "\ufe9f", "ǧ" # ﺟ
+    sub "\ufea0", "ǧ" # ﺠ
+    sub "\ufe9e", "ǧ" # ﺞ
+
+    sub "\u062d", "ḥ" # ح
+    sub "\ufea3", "ḥ" # ﺣ
+    sub "\ufea4", "ḥ" # ﺤ
+    sub "\ufea2", "ḥ" # ﺢ
+
+    sub "\u062e", "ẖ" # خ
+    sub "\ufea7", "ẖ" # ﺧ
+    sub "\ufea8", "ẖ" # ﺨ
+    sub "\ufea6", "ẖ" # ﺦ
+
+    sub "\u062f", "d" # د
+    sub "\ufeaa", "d" # ﺪ
+
+    sub "\u0630", "ḏ" # ذ
+    sub "\ufeac", "ḏ" # ﺬ
+
+    sub "\u0631", "r" # ر
+    sub "\ufeae", "r" # ﺮ
+
+    sub "\u0632", "z" # ز
+    sub "\ufeb0", "z" # ﺰ
+
+    sub "\u0633", "s" # س
+    sub "\ufeb3", "s" # ﺳ
+    sub "\ufeb4", "s" # ﺴ
+    sub "\ufeb2", "s" # ﺲ
+
+    sub "\u0634", "š" # ش
+    sub "\ufeb7", "š" # ﺷ
+    sub "\ufeb8", "š" # ﺸ
+    sub "\ufeb6", "š" # ﺶ
+
+    sub "\u0635", "ṣ" # ص
+    sub "\ufebb", "ṣ" # ﺻ
+    sub "\ufebc", "ṣ" # ﺼ
+    sub "\ufeba", "ṣ" # ﺺ
+
+    sub "\u0636", "ḍ" # ض
+    sub "\ufebf", "ḍ" # ﺿ
+    sub "\ufec0", "ḍ" # ﻀ
+    sub "\ufebe", "ḍ" # ﺾ
+
+    sub "\u0637", "ṭ" # ط
+    sub "\ufec3", "ṭ" # ﻃ
+    sub "\ufec4", "ṭ" # ﻄ
+    sub "\ufec2", "ṭ" # ﻂ
+
+    sub "\u0638", "ẓ" # ظ
+    sub "\ufec7", "ẓ" # ﻇ
+    sub "\ufec8", "ẓ" # ﻈ
+    sub "\ufec6", "ẓ" # ﻆ
+
+    sub "\u0639", "‘" # ع
+    sub "\ufecb", "‘" # ﻋ
+    sub "\ufecc", "‘" # ﻌ
+    sub "\ufeca", "‘" # ﻊ
+
+    sub "\u063a", "ġ" # غ
+    sub "\ufecf", "ġ" # ﻏ
+    sub "\ufed0", "ġ" # ﻐ
+    sub "\ufece", "ġ" # ﻎ
+
+    sub "\u0641", "f" # ف
+    sub "\ufed3", "f" # ﻓ
+    sub "\ufed4", "f" # ﻔ
+    sub "\ufed2", "f" # ﻒ
+
+    sub "\u0642", "q" # ق
+    sub "\ufed7", "q" # ﻗ
+    sub "\ufed8", "q" # ﻘ
+    sub "\ufed6", "q" # ﻖ
+
+    sub "\u0643", "k" # ك
+    sub "\ufedb", "k" # ﻛ
+    sub "\ufedc", "k" # ﻜ
+    sub "\ufeda", "k" # ﻚ
+
+    sub "\u0644", "l" # ل
+    sub "\ufedf", "l" # ﻟ
+    sub "\ufee0", "l" # ﻠ
+    sub "\ufede", "l" # ﻞ
+
+    sub "\u0645", "m" # م
+    sub "\ufee3", "m" # ﻣ
+    sub "\ufee4", "m" # ﻤ
+    sub "\ufee2", "m" # ﻢ
+
+    sub "\u0646", "n" # ن
+    sub "\ufee7", "n" # ﻧ
+    sub "\ufee8", "n" # ﻨ
+    sub "\ufee6", "n" # ﻦ
+
+    # See note C
+    sub "\u0647", "h" # ه
+    sub "\ufeeb", "h" # ﻫ
+    sub "\ufeec", "h" # ﻬ
+    sub "\ufeea", "h" # ﻪ
+
+    sub "\u0648", "w" # و
+    sub "\ufeee", "w" # ﻮ
+
+    sub "\u064a", "y" # ي
+    sub "\ufef3", "y" # ﻳ
+    sub "\ufef4", "y" # ﻴ
+    sub "\ufef1", "y" # ﻱ
+
+    sub "\u0649", "ỳ" # ى
+
+    # pointing
+    sub "\u064e", "a" # َ fatha
+    sub "\u064e", "", after: "\u0629" # َ fatha followed by ta' marboota
+    sub "\u064e\u0627", "a’" # ـَا fatha followed by ا
+    sub "\u064b", "á" #  ً
+    sub "\u064b\u0627", "á’" #  ـًا
+    sub "\u0670", "ā" # '
+    sub "\u064e\u0649", "aỳ" # ـَى fatha followed by ى which is ا not ي
+    sub "\u0649\u0670", "āỳ" # ى'
+    sub "\u0649\u0670", "áỳ" # ىً
+
+    sub "\u064f", "u" # ُ damma
+    sub "\u064f\u0648", "uw" # ـُو damma followed by و
+    sub "\u064c", "ú" #  ٌ
+    sub "\u064e\u0648\u0652", "aw°" # ـَوْ
+
+    sub "\u0650", "i" # ِ kasra
+    sub "\u0650\u064a", "iy" # ـِي kasra followed by ي
+    sub "\u064d", "í" #  ٍ
+    sub "\u064e\u064a\u0652", "ay°" # ـَيْ
+    sub "\u0650\u064a\u0651\u064e", "iy" # ـِيَّ
+
+    sub "\u0650\u064a", "iy", after: any("\u064e\u064f") # ـِي kasra followed by ي
+    sub "\u0652", "°" # ْ sokoon, see note A below
+    sub "\u0651", "̄" # ّ shadda
+    sub "\u0671", "ˆ" # ‫ٱ‬ hamzat wasl
+
+
+    sub "\u060C", "," # ،
+    sub "\u061B", ";" # ‫؛‬
+    sub "\u061F", "?" # ؟
+
+    sub "\u0660", "0"
+    sub "\u0661", "1"
+    sub "\u0662", "2"
+    sub "\u0663", "3"
+    sub "\u0664", "4"
+    sub "\u0665", "5"
+    sub "\u0666", "6"
+    sub "\u0667", "7"
+    sub "\u0668", "8"
+    sub "\u0669", "9"
+
+
+    # special pointed letters
+    sub "\u0639\u064e", "‘a" # عَ
+    sub "\u0639\u0650", "‘i" # عِ
+    sub "\u0639\u064f", "‘ū" # عُ
+    # handle MacOS regex difference
+    sub "\u0639\u064f\u0648", "‘ū" # عُو damma followed by و
+
+
+    # Sun letters
+
+    sub boundary + "\u0627\u0644\u062a" + maybe("\u0651"), "at t" # الت
+    sub boundary + "\u0627\u0644\u062b" + maybe("\u0651"), "aṯ ṯ" # الث
+    sub boundary + "\u0627\u0644\u062f" + maybe("\u0651"), "ad d" # الد
+    sub boundary + "\u0627\u0644\u0630" + maybe("\u0651"), "aḏ ḏ" # الذ
+    sub boundary + "\u0627\u0644\u0631" + maybe("\u0651"), "ar r" # الر
+    sub boundary + "\u0627\u0644\u0632" + maybe("\u0651"), "az z" # الز
+    sub boundary + "\u0627\u0644\u0633" + maybe("\u0651"), "as s" # الس
+    sub boundary + "\u0627\u0644\u0634" + maybe("\u0651"), "aš š" # الش
+    sub boundary + "\u0627\u0644\u0635" + maybe("\u0651"), "aṣ ṣ" # الص
+    sub boundary + "\u0627\u0644\u0636" + maybe("\u0651"), "aḍ ḍ" # الض
+    sub boundary + "\u0627\u0644\u0637" + maybe("\u0651"), "aṭ ṭ" # الط
+    sub boundary + "\u0627\u0644\u0638" + maybe("\u0651"), "aẓ ẓ" # الظ
+    sub boundary + "\u0627\u0644\u0644" + maybe("\u0651"), "al l" # الل
+    sub boundary + "\u0627\u0644\u0646" + maybe("\u0651"), "an n" # الن
+
+    # ta' marboota in iso-233-1984 is all the same `aẗ`
+    sub "\u0629", "aẗ" # ة in the middle of the sentence
+
+
+  }
+
+  # POSTRULES
+  sub any("\u0061".."\uFFFF"), upcase, before: boundary, not_before: boundary + any("‘’'")
+  # don't capitalize defined article in the middle of a sentence
+  sub " At T", " at T" # الت
+  sub " Aṯ Ṯ", " aṯ Ṯ" # الث
+  sub " Ad D", " ad D" # الد
+  sub " Aḏ Ḏ", " aḏ Ḏ" # الذ
+  sub " Ar R", " ar R" # الر
+  sub " Az Z", " az Z" # الز
+  sub " As S", " as S" # الس
+  sub " Aš Š", " aš Š" # الش
+  sub " Aṣ Ṣ", " aṣ Ṣ" # الص
+  sub " Aḍ Ḍ", " aḍ Ḍ" # الض
+  sub " Aṭ Ṭ", " aṭ Ṭ" # الط
+  sub " Aẓ Ẓ", " aẓ Ẓ" # الظ
+  sub " Al L", " al L" # الل
+  sub " An N", " an N" # الن
+  sub " Al ", " al " # ال
+
+}