diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 0000000..c83c3a6 --- /dev/null +++ b/.clang-tidy @@ -0,0 +1,111 @@ +--- +# Configure clang-tidy for this project. + +# Here is an explanation for why some of the checks are disabled: +# +# -google-readability-namespace-comments: the *_CLIENT_NS is a macro, and +# clang-tidy fails to match it against the initial value. +# +# -modernize-use-trailing-return-type: clang-tidy recommends using +# `auto Foo() -> std::string { return ...; }`, we think the code is less +# readable in this form. +# +# -modernize-return-braced-init-list: We think removing typenames and using +# only braced-init can hurt readability. +# +# -modernize-avoid-c-arrays: We only use C arrays when they seem to be the +# right tool for the job, such as `char foo[] = "hello"`. In these cases, +# avoiding C arrays often makes the code less readable, and std::array is +# not a drop-in replacement because it doesn't deduce the size. +# +# -performance-move-const-arg: This warning requires the developer to +# know/care more about the implementation details of types/functions than +# should be necessary. For example, `A a; F(std::move(a));` will trigger a +# warning IFF `A` is a trivial type (and therefore the move is +# meaningless). It would also warn if `F` accepts by `const&`, which is +# another detail that the caller need not care about. +# +# -readability-redundant-declaration: A friend declaration inside a class +# counts as a declaration, so if we also declare that friend outside the +# class in order to document it as part of the public API, that will +# trigger a redundant declaration warning from this check. +# +# -readability-function-cognitive-complexity: too many false positives with +# clang-tidy-12. We need to disable this check in macros, and that setting +# only appears in clang-tidy-13. +# +# -bugprone-narrowing-conversions: too many false positives around +# `std::size_t` vs. `*::difference_type`. +# +# -bugprone-easily-swappable-parameters: too many false positives. +# +# -bugprone-implicit-widening-of-multiplication-result: too many false positives. +# Almost any expression of the form `2 * variable` or `long x = a_int * b_int;` +# generates an error. +# +# -bugprone-unchecked-optional-access: too many false positives in tests. +# Despite what the documentation says, this warning appears after +# `ASSERT_TRUE(variable)` or `ASSERT_TRUE(variable.has_value())`. +# +Checks: > + -*, + bugprone-*, + google-*, + misc-*, + modernize-*, + performance-*, + portability-*, + readability-*, + -google-readability-braces-around-statements, + -google-readability-namespace-comments, + -google-runtime-references, + -misc-non-private-member-variables-in-classes, + -misc-const-correctness, + -modernize-return-braced-init-list, + -modernize-use-trailing-return-type, + -modernize-use-nodiscard, + -modernize-avoid-c-arrays, + -performance-move-const-arg, + -readability-braces-around-statements, + -readability-identifier-length, + -readability-named-parameter, + -readability-redundant-declaration, + -readability-function-cognitive-complexity, + -bugprone-narrowing-conversions, + -bugprone-easily-swappable-parameters, + -bugprone-implicit-widening-of-multiplication-result, + -bugprone-exception-escape, + -bugprone-unchecked-optional-access, + -portability-simd-intrinsics + +# Turn all the warnings from the checks above into errors. +WarningsAsErrors: "*" + +HeaderFilterRegex: "(google/cloud/|generator/).*\\.h$" + +CheckOptions: + - { key: readability-identifier-naming.NamespaceCase, value: lower_case } + - { key: readability-identifier-naming.ClassCase, value: CamelCase } + - { key: readability-identifier-naming.StructCase, value: CamelCase } + - { key: readability-identifier-naming.TemplateParameterCase, value: CamelCase } + - { key: readability-identifier-naming.FunctionCase, value: lower_case } + - { key: readability-identifier-naming.VariableCase, value: lower_case } + - { key: readability-identifier-naming.ClassMemberCase, value: lower_case } + - { key: readability-identifier-naming.ClassMethodCase, value: lower_case } + - { key: readability-identifier-naming.ClassMemberSuffix, value: _ } + - { key: readability-identifier-naming.PrivateMemberSuffix, value: _ } + - { key: readability-identifier-naming.ProtectedMemberSuffix, value: _ } + - { key: readability-identifier-naming.EnumConstantCase, value: CamelCase } + - { key: readability-identifier-naming.ConstexprVariableCase, value: CamelCase } + - { key: readability-identifier-naming.ConstexprVariablePrefix, value: k } + - { key: readability-identifier-naming.GlobalConstantCase, value: CamelCase } + - { key: readability-identifier-naming.GlobalConstantPrefix, value: k } + - { key: readability-identifier-naming.MemberCase, value: lower_case } + - { key: readability-identifier-naming.MemberConstantCase, value: CamelCase } + - { key: readability-identifier-naming.MemberConstantPrefix, value: k } + - { key: readability-identifier-naming.StaticConstantCase, value: CamelCase } + - { key: readability-identifier-naming.StaticConstantPrefix, value: k } + - { key: readability-implicit-bool-conversion.AllowIntegerConditions, value: 1 } + - { key: readability-implicit-bool-conversion.AllowPointerConditions, value: 1 } + - { key: readability-function-cognitive-complexity.IgnoreMacros, value: 1 } + - { key: readability-identifier-naming.TemplateParameterIgnoredRegexp, value: 'expr-type'} diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml new file mode 100644 index 0000000..acc712b --- /dev/null +++ b/.github/workflows/main.yml @@ -0,0 +1,111 @@ +name: "main" +on: + push: + branches: + - main + pull_request: + branches: + - '**' +env: + ccache_basedir: ${{ github.workspace }} + ccache_dir: "${{ github.workspace }}/.ccache" + ccache_compilercheck: content + ccache_compress: 'true' + ccache_compresslevel: 9 + ccache_maxsize: 200M + ccache_cmake: -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache + +jobs: + main: + name: "build-test" + runs-on: "ubuntu-latest" + steps: + - name: Checkout + uses: actions/checkout@v2 + with: + submodules: recursive + - name: Install Dependencies + run: |- + sudo apt-get update + sudo apt-get install -y \ + ccache \ + libibus-1.0-dev \ + libyaml-cpp-dev \ + + - name: Install slimt + run: |- + sudo apt-get install -y libopenblas-dev libsentencepiece-dev + + # libxsimd-dev fails due to some issue. + git clone https://github.com/xtensor-stack/xsimd --branch 11.1.0 --depth 1 + cmake -B xsimd/build -S xsimd + cmake --build xsimd/build --target all + sudo cmake --build xsimd/build --target install + + git clone --recursive https://github.com/jerinphilip/slimt --single-branch --branch main + cmake -B slimt/build -S slimt -DEXPORT_CMAKE_FILE=ON -DUSE_BUILTIN_SENTENCEPIECE=OFF \ + -DWITH_GEMMOLOGY=ON -DUSE_AVX2=ON -DUSE_SSE2=ON -DUSE_SSSE3=ON -DUSE_AVX512=ON \ + -DWITH_INTGEMM=OFF \ + -DSLIMT_PACKAGE=ON + cmake --build slimt/build --target all + sudo cmake --build slimt/build --target install + + - name: Generate ccache_vars for ccache based on machine + shell: bash + id: ccache_vars + run: |- + echo "::set-output name=hash::$(echo ${{ env.ccache_compilercheck }})" + echo "::set-output name=timestamp::$(date '+%Y-%m-%dT%H.%M.%S')" + + - name: Cache-op for build-cache through ccache + uses: actions/cache@v2 + with: + path: ${{ env.ccache_dir }} + key: ccache-${{ matrix.identifier }}-${{ steps.ccache_vars.outputs.hash }}-${{ github.ref }}-${{ steps.ccache_vars.outputs.timestamp }} + restore-keys: |- + ccache-${{ matrix.identifier }}-${{ steps.ccache_vars.outputs.hash }}-${{ github.ref }} + ccache-${{ matrix.identifier }}-${{ steps.ccache_vars.outputs.hash }} + ccache-${{ matrix.identifier }} + - name: ccache environment setup + run: |- + echo "CCACHE_COMPILER_CHECK=${{ env.ccache_compilercheck }}" >> $GITHUB_ENV + echo "CCACHE_BASEDIR=${{ env.ccache_basedir }}" >> $GITHUB_ENV + echo "CCACHE_COMPRESS=${{ env.ccache_compress }}" >> $GITHUB_ENV + echo "CCACHE_COMPRESSLEVEL=${{ env.ccache_compresslevel }}" >> $GITHUB_ENV + echo "CCACHE_DIR=${{ env.ccache_dir }}" >> $GITHUB_ENV + echo "CCACHE_MAXSIZE=${{ env.ccache_maxsize }}" >> $GITHUB_ENV + - name: ccache prolog + run: |- + ccache -s # Print current cache stats + ccache -z # Zero cache entry + - name: cmake + run: |- + mkdir -p build + cd build + cmake -L .. -DCOMPILE_TESTS=on ${{ env.ccache_cmake }} + + - name: Build from source + working-directory: build + run: | + make -j2 + + - name: "Download models (slimt) and configure ibus-slimt-t8n" + run: |- + # Install bergamot, which will manage models. + python3 -m pip install slimt -f https://github.com/jerinphilip/slimt/releases/expanded_assets/latest + + # Download models. + slimt download -m en-fr-tiny + slimt download -m fr-en-tiny + slimt download -m de-en-tiny + slimt download -m en-de-tiny + + python3 scripts/ibus-slimt-t8n-configure.py --default browsermt/en-de-tiny --verify + + - name: Test translator backend + run: |- + ./test fake < ${{ github.workspace }}/data/samples.txt + ./test real < ${{ github.workspace }}/data/samples.txt + + - name: ccache epilog + run: 'ccache -s # Print current cache stats' diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1bf05d9 --- /dev/null +++ b/.gitignore @@ -0,0 +1,18 @@ +build/ +dist/ +.*sw[op] + +lemonade/ibus_config.h +*.egg-info +*.pyc +__pycache__ +env/ + +docs/_build +docs/make.bat + +ibus-slimt-t8n.fossil +*.marks + +.cache +.fslckout diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..91f139d --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,49 @@ +cmake_minimum_required(VERSION 3.5.1) +set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) +include(CMakeDependentOption) + +if(POLICY CMP0074) + cmake_policy(SET CMP0074 NEW) # CMake 3.12 +endif() +project(ibus-slimt-t8n CXX C) + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +include(GetVersionFromFile) +message(STATUS "Project name: ${PROJECT_NAME}") +message(STATUS "Project version: ${PROJECT_VERSION_STRING_FULL}") + +set(AUTHOR "Jerin Philip ") +set(AUTHOR_XML_ESCAPED "Jerin Philip <jerinphilip@live.in>") +set(PROJECT_SHORTNAME "ibus-slimt-t8n") +set(PROJECT_LONGNAME "slimt-t8n") +set(PROJECT_DESCRIPTION + "slimt-t8n provides client-side translation on the local *nix machine.") +set(PROJECT_VERSION ${PROJECT_VERSION_STRING}) +set(PROJECT_HOMEPAGE "https://github.com/jerinphilip/ibus-slimt-t8n") +set(PROJECT_LICENSE "GPL") + +find_package(yaml-cpp REQUIRED) +find_package(slimt REQUIRED) + +find_package(PkgConfig) +pkg_check_modules(GLIB2 REQUIRED glib-2.0) +pkg_check_modules(IBUS REQUIRED ibus-1.0) + +if(NOT TARGET yaml-cpp::yaml-cpp) + add_library(yaml-cpp::yaml-cpp ALIAS yaml-cpp) +endif() + +set(SLIMT_T8N_PRIVATE_LIBS slimt::slimt-shared yaml-cpp::yaml-cpp + ${GLIB2_LIBRARIES} ${IBUS_LIBRARIES}) + +include(GNUInstallDirs) + +add_subdirectory(ibus-slimt-t8n) + +install(TARGETS ibus-slimt-t8n RUNTIME DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}) +install(FILES ${CMAKE_BINARY_DIR}/slimt-t8n.xml + DESTINATION /usr/share/ibus/component) +install(FILES ${CMAKE_SOURCE_DIR}/assets/bergamot.png + DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/icons) diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..d159169 --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..4468fe3 --- /dev/null +++ b/README.md @@ -0,0 +1,98 @@ +# ibus-slimt-t8n + +[![License: GPL v2](https://img.shields.io/badge/License-GPL%20v2-blue.svg)](https://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html) + +Repurposes [Intelligent Input Method +Bus](https://en.wikipedia.org/wiki/Intelligent_Input_Bus) (iBus) to intercept +text entered into a field by a user, and insert translated text in it's place +into any graphical application. Hence, this allows you as a user to enter text +in a language you know, while the field gets the translated text. Useful when +interacting with websites or agents in a foreign language. + +See the software in action below: + +iBus translation in action + +The functionality will work in any GUI application which requests +keyboard-input - browser, text-editors, mail-clients, chat-clients etc. + +## Setup + +Setup requires the following: + + +* Compiler supporting C++17 or higher. +* [slimt](https://github.com/jerinphilip/slimt), which provides a + commodity machine (most CPUs) inference engine for the neural models + powering machine translation. +* ibus and GLIB 2.0 libraries (for GTK and iBus) +* yaml-cpp + +**Installing Dependencies** On ArchLinux, dependencies can be installed by: + +```bash +pacman -S ibus glib2 +pacman -S yaml-cpp +``` + +For [slimt](https://github.com/jerinphilip/slimt), follow instructions in the +repository. Package management (models) is done by _slimt_, and +`ibus-slimt-t8n` is downstream to slimt. The command-line that also manages +packages can be installed via PyPI and can operate standalone. + +```bash +python3 -m pip install slimt +slimt ls +slimt download +``` + +**Building from source** Once things are in place, run the following steps to +build and install `ibus-slimt-t8n` and associated files required for +integration into ibus. + +``` +# Configure. +cmake -B build -S . + +# Build +cmake --build build --target all + +# Install. +sudo cmake --build build --target install +# Install the project... +# -- Install configuration: "Release" +# -- Up-to-date: /usr/local/libexec/ibus-slimt-t8n +# -- Up-to-date: /usr/share/ibus/component/slimt-t8n.xml +# -- Up-to-date: /usr/local/share/icons/bergamot.png +``` + +**First run** For first time install: + +1. Restart GNOME (in case the entry does not appear among available input methods). +2. Restart `ibus-daemon` (`ibus-daemon -rXv`) so the ibus parent updates to be + aware of `slimt-t8n.xml` and is capable of spawning the ibus engine + (`ibus-slimt-t8n`). + +From time to time, you may configure `ibus-slimt-t8n` using the script supplied +([`scripts/ibus-slimt-t8n-configure.py`](./scripts/ibus-slimt-t8n-configure.py), +requires python and [`slimt`](https://pypi.org/project/slimt/) python package), +installed previously. + +```bash +python3 scripts/ibus-slimt-t8n-configure.py \ + --default browsermt/en-de-tiny \ + --verify +``` + +The generated configuration is located at `$HOME/.config/ibus-slimt-t8n.yml` +which can be edited by hand to add your own models, as long the YAML remains +valid. + +**Related Projects** + +* [bergamot-translator](https://github.com/browsermt/bergamot-translator) +* [translateLocally](https://github.com/XapaJIaMnu/translateLocally) +* [slimt](https://github.com/jerinphilip/slimt) +* [lemonade](https://github.com/jerinphilip/lemonade) (This repository is a + rebrand after reducing dependencies and using slimt). + diff --git a/assets/bergamot.png b/assets/bergamot.png new file mode 100644 index 0000000..62b5711 Binary files /dev/null and b/assets/bergamot.png differ diff --git a/cmake/GetVersionFromFile.cmake b/cmake/GetVersionFromFile.cmake new file mode 100644 index 0000000..ce8bfaf --- /dev/null +++ b/cmake/GetVersionFromFile.cmake @@ -0,0 +1,60 @@ +## +# This CMake modules sets the project version from a version file. +# +# The module sets the following variables: +# +# * PROJECT_VERSION_STRING +# * PROJECT_VERSION_STRING_FULL +# * PROJECT_VERSION_MAJOR +# * PROJECT_VERSION_MINOR +# * PROJECT_VERSION_PATCH +# * PROJECT_VERSION_TWEAK +# * PROJECT_VERSION_GIT_SHA +# +# This module is public domain, use it as it fits you best. +## + +# Get full string version from file +if(PROJECT_VERSION_FILE) + file(STRINGS ${PROJECT_VERSION_FILE} PROJECT_VERSION_STRING) +else() + file(STRINGS ${CMAKE_CURRENT_SOURCE_DIR}/${PROJECT_NAME}.version PROJECT_VERSION_STRING) +endif() + +# Get current commit SHA from git +execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VARIABLE PROJECT_VERSION_GIT_SHA + OUTPUT_STRIP_TRAILING_WHITESPACE) + +# Get partial versions into a list +string(REGEX MATCHALL "-.*$|[0-9]+" PROJECT_PARTIAL_VERSION_LIST + ${PROJECT_VERSION_STRING}) + +# Set the version numbers +list(GET PROJECT_PARTIAL_VERSION_LIST 0 PROJECT_VERSION_MAJOR) +list(GET PROJECT_PARTIAL_VERSION_LIST 1 PROJECT_VERSION_MINOR) +list(GET PROJECT_PARTIAL_VERSION_LIST 2 PROJECT_VERSION_PATCH) + +# The tweak part is optional, so check if the list contains it +list(LENGTH PROJECT_PARTIAL_VERSION_LIST PROJECT_PARTIAL_VERSION_LIST_LEN) +if(PROJECT_PARTIAL_VERSION_LIST_LEN GREATER 3) + list(GET PROJECT_PARTIAL_VERSION_LIST 3 PROJECT_VERSION_TWEAK) + string(SUBSTRING ${PROJECT_VERSION_TWEAK} 1 -1 PROJECT_VERSION_TWEAK) +endif() + +# Unset the list +unset(PROJECT_PARTIAL_VERSION_LIST) + +# Set full project version string +set(PROJECT_VERSION_STRING_FULL + ${PROJECT_VERSION_STRING}+${PROJECT_VERSION_GIT_SHA}) + +# Print all variables for debugging +#message(STATUS ${PROJECT_VERSION_STRING_FULL}) +#message(STATUS ${PROJECT_VERSION_STRING}) +#message(STATUS ${PROJECT_VERSION_MAJOR}) +#message(STATUS ${PROJECT_VERSION_MINOR}) +#message(STATUS ${PROJECT_VERSION_PATCH}) +#message(STATUS ${PROJECT_VERSION_TWEAK}) +#message(STATUS ${PROJECT_VERSION_GIT_SHA}) diff --git a/data/samples.txt b/data/samples.txt new file mode 100644 index 0000000..b747abb --- /dev/null +++ b/data/samples.txt @@ -0,0 +1,4 @@ +English French 1 2 3 4 5 6 +French English 1 2 3 4 5 6 +English German 1 2 3 4 5 6 +German English 1 2 3 4 5 6 diff --git a/data/slimt-t8n-config.yaml b/data/slimt-t8n-config.yaml new file mode 100644 index 0000000..80bbbed --- /dev/null +++ b/data/slimt-t8n-config.yaml @@ -0,0 +1,44 @@ +# Sample config for slimt-t8n +# +# We want iBus to be (hand) configurable by the user, and not reliant on the +# structure imposed by the package managing system. + +# Ideally authors like for these to be codes, but this is already set by +# translateLocally. + +languages: + - "German" + - "French" + - "English" + - "Czech" + - "Spanish" + +default: + - source: "English" + target: "German" + +verify: true + +# TODO(jerin): Spec and incorporate. +# preferred: +# - model: "en-de-tiny" +# source: "English" +# target: "German" +# +# - model: "en-de-tiny" +# source: "English" +# target: "German" + + +models: + - name: "en-de-tiny" + arch: "tiny" + direction: + source: "English" + target: "German" + root: ".local/share/slimt/models/browsermt/en-de-tiny" + model: "model.intgemm.alphas.bin" + vocabulary: + source: "vocab.ende.spm" + target: "vocab.ende.spm" + shortlist: "lex.s2t.bin" diff --git a/docs/ibus-development.md b/docs/ibus-development.md new file mode 100644 index 0000000..7d09f18 --- /dev/null +++ b/docs/ibus-development.md @@ -0,0 +1,66 @@ +# Development notes + +This page aggregates and organizes links and resources during the course of +development for the contributors' future reference. + +## ibus + +API documentation of ibus is available at: + +* [https://ibus.github.io/docs/ibus-1.5/index.html](https://ibus.github.io/docs/ibus-1.5/index.html) + +However, most of the ibus code here is based on the following C++ adaptation. + +* [ibus-libzhuyin](https://github.com/epico/ibus-libzhuyin) + +The CMakeLists based install is adopted from +[libvarnam-ibus](https://github.com/varnamproject/libvarnam-ibus). + + +**Developing** If you are developing `ibus-slimt-t8n`, restarting `ibus-daemon` +suffices once the executable is overwritten by a more recent install. There +are some detailed setup-instructions with screenshots through the process +available [here](https://github.com/jerinphilip/lemonade/wiki/Setting-Up-iBus). + +Logs are printed to `stderr` (slimt) and glib-log (ibus). Replacing existing +ibus-daemon by spawning a new one using `ibus-daemon -rxv` will leave a process +open and printing to the console. + + +**Debugging** If you want to drop to debugging, keep an `ibus-daemon` open. +With this open, we can explicitly spawn the `ibus-slimt-t8n` via a debugger +(like GDB) so it registers on D-Bus and will be picked up by the open +`ibus-daemon` instead of spawning a new one. + +```bash +ibus-daemon -xrv +gdb --args /usr/local/libexec/ibus-slimt-t8n --ibus +``` + +If there are crashes (observable when toggling the engine on and the fallback +keyboard comes to play, it's a silent failure) - it helps pinpoint where the +issue is happening using an executable compiled with debug symbols (using +`-DCMAKE_BUILD_TYPE=Debug`). + +Keep in mind to update the path with where `ibus-slimt-t8n` is installed in +your Operating System adhering to conventions. + +**Useful Commands** The following commands are also useful in diagnosing +issues. + +You can command line verify if ibus has loaded the engine correctly by looking +for it as follows via `ibus` command-line interface. + +```bash +ibus read-cache | grep "slimt-t8n" +``` + +If there's output in the XML it means ibus integration is aware of +`ibus-slimt-t8n` engine. + +## Launching iBus + +* On the GNOME Desktop Environment, Go to **Settings > Language and Region**
+ +* Search for "slimt-t8n", add it among input sources. If you don't see this + entry, try logging out of GNOME and back in again. diff --git a/ibus-slimt-t8n.version b/ibus-slimt-t8n.version new file mode 100644 index 0000000..77d6f4c --- /dev/null +++ b/ibus-slimt-t8n.version @@ -0,0 +1 @@ +0.0.0 diff --git a/ibus-slimt-t8n/CMakeLists.txt b/ibus-slimt-t8n/CMakeLists.txt new file mode 100644 index 0000000..db3bee6 --- /dev/null +++ b/ibus-slimt-t8n/CMakeLists.txt @@ -0,0 +1,36 @@ +set(IBUS_BUS_NAME "org.freedesktop.IBus.slimt") +set(IBUS_ENGINE_NAME "slimt-t8n") +set(IBUS_ENGINE_EXECUTABLE_NAME "ibus-${IBUS_ENGINE_NAME}") +set(IBUS_COMMANDLINE + "${CMAKE_INSTALL_LIBEXECDIR}/${IBUS_ENGINE_EXECUTABLE_NAME}") +set(IBUS_TEXTDOMAIN "ibus-slimt") +set(IBUS_ICON "${CMAKE_INSTALL_FULL_DATAROOTDIR}/icons/bergamot.png") +set(IBUS_LAYOUT "us") +set(IBUS_LANGUAGE "en") + +set(IBUS_FULL_COMMANDLINE + "${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBEXECDIR}/${IBUS_ENGINE_EXECUTABLE_NAME} --ibus" +) + +configure_file("${CMAKE_CURRENT_SOURCE_DIR}/${IBUS_ENGINE_NAME}.xml.in" + "${CMAKE_BINARY_DIR}/${IBUS_ENGINE_NAME}.xml") + +configure_file("${CMAKE_CURRENT_SOURCE_DIR}/ibus_config.h.in" + "${CMAKE_CURRENT_BINARY_DIR}/ibus_config.h" @ONLY) + +add_library(slimt-t8n STATIC engine_compat.cpp slimt_engine.cpp translator.cpp + application.cpp) +target_link_libraries(slimt-t8n PUBLIC ${SLIMT_T8N_PRIVATE_LIBS}) + +target_include_directories( + slimt-t8n PUBLIC $ + $) + +target_include_directories(slimt-t8n PUBLIC ${GLIB2_INCLUDE_DIRS} + ${IBUS_INCLUDE_DIRS}) + +add_executable(${IBUS_ENGINE_EXECUTABLE_NAME} main.cpp) +target_link_libraries(${IBUS_ENGINE_EXECUTABLE_NAME} PUBLIC slimt-t8n) + +add_executable(test test.cpp) +target_link_libraries(test PUBLIC slimt-t8n) diff --git a/ibus-slimt-t8n/application.cpp b/ibus-slimt-t8n/application.cpp new file mode 100644 index 0000000..abcf565 --- /dev/null +++ b/ibus-slimt-t8n/application.cpp @@ -0,0 +1,77 @@ +#include "ibus-slimt-t8n/application.h" +#include "ibus-slimt-t8n/logging.h" + +namespace ibus::slimt::t8n { + +Application::Application(gboolean ibus) { + ibus_init(); + + // TODO(jerin): Bus can be g::Object derived. + bus_ = ibus_bus_new(); + + if (!ibus_bus_is_connected(bus_.get())) { + LOG("Cannot connect to ibus!"); + g_warning("Can not connect to ibus!"); + std::abort(); + } + + if (!ibus_bus_get_config(bus_.get())) { + LOG("IBus config component is not ready!"); + g_warning("IBus config component is not ready!"); + std::abort(); + } + + auto callback = +[](IBusBus *, gpointer) { ibus_quit(); }; + + g_signal_connect(bus_.get(), "disconnected", G_CALLBACK(callback), NULL); + + LOG("Adding factory"); + factory_ = ibus_factory_new(ibus_bus_get_connection(bus_.get())); + + ibus_factory_add_engine(factory_.get(), PROJECT_SHORTNAME, + IBUS_TYPE_SLIMT_T8N_ENGINE); + + if (ibus) { + LOG("ibus = true, requesting bus"); + ibus_bus_request_name(bus_.get(), IBUS_BUS_NAME, 0); + } else { + LOG("ibus = false, creating new bus"); + g::Holder component( // + ibus_component_new( // + IBUS_BUS_NAME, // + PROJECT_DESCRIPTION, // + PROJECT_VERSION, // + PROJECT_LICENSE, // + AUTHOR, // + PROJECT_HOMEPAGE, // + IBUS_COMPONENT_COMMANDLINE, // + IBUS_TEXTDOMAIN // + )); + + if (component.get()) { + LOG("creating component success"); + } + + g::Holder description( // + ibus_engine_desc_new( // + PROJECT_SHORTNAME, // + PROJECT_LONGNAME, // + PROJECT_DESCRIPTION, // + IBUS_LANGUAGE, // + PROJECT_LICENSE, // + AUTHOR, // + IBUS_ICON, // + IBUS_LAYOUT // + )); + + ibus_component_add_engine(component.get(), description.get()); + ibus_bus_register_component(bus_.get(), component.get()); + } +} + +void Application::run() { + LOG("Spawning ibus main"); + ibus_main(); + LOG("Ending ibus main"); +} +} // namespace ibus::slimt::t8n diff --git a/ibus-slimt-t8n/application.h b/ibus-slimt-t8n/application.h new file mode 100644 index 0000000..3718853 --- /dev/null +++ b/ibus-slimt-t8n/application.h @@ -0,0 +1,17 @@ +#pragma once +#include "ibus-slimt-t8n/engine_compat.h" +#include "ibus-slimt-t8n/ibus_config.h" +#include +#include + +namespace ibus::slimt::t8n { +class Application { +public: + explicit Application(gboolean ibus); + static void run(); + +private: + g::Holder bus_{nullptr}; + g::Holder factory_{nullptr}; +}; +} // namespace ibus::slimt::t8n diff --git a/ibus-slimt-t8n/engine_compat.cpp b/ibus-slimt-t8n/engine_compat.cpp new file mode 100644 index 0000000..ab0094a --- /dev/null +++ b/ibus-slimt-t8n/engine_compat.cpp @@ -0,0 +1,218 @@ +#include "ibus-slimt-t8n/engine_compat.h" +#include "ibus-slimt-t8n/slimt_engine.h" +#include + +namespace ibus::slimt::t8n { + +/* code of engine class of GObject */ +#define IBUS_SLIMT_T8N_ENGINE(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST((obj), IBUS_TYPE_SLIMT_T8N_ENGINE, \ + IBusSlimtEngine)) +#define IBUS_SLIMT_T8N_ENGINE_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST((klass), IBUS_TYPE_SLIMT_T8N_ENGINE, \ + IBusSlimtEngineClass)) +#define IBUS_IS_SLIMT_T8N_ENGINE(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE((obj), IBUS_TYPE_SLIMT_T8N_ENGINE)) +#define IBUS_IS_SLIMT_T8N_ENGINE_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_TYPE((klass), IBUS_TYPE_SLIMT_T8N_ENGINE)) +#define IBUS_SLIMT_T8N_ENGINE_GET_CLASS(obj) \ + (G_TYPE_INSTANCE_GET_CLASS((obj), IBUS_TYPE_SLIMT_T8N_ENGINE, \ + IBusSlimtEngineClass)) + +using IBusSlimtEngine = struct IBusSlimtEngine; +using IBusSlimtEngineClass = struct IBusSlimtEngineClass; + +struct IBusSlimtEngineClass { + IBusEngineClass parent; +}; + +struct IBusSlimtEngine { + IBusEngine parent; + + /* members */ + Engine *engine; +}; + +/* functions prototype */ +static void ibus_slimt_t8n_engine_class_init(IBusSlimtEngineClass *klass); +static void ibus_slimt_t8n_engine_init(IBusSlimtEngine *slimt_t8n); +static GObject * +ibus_slimt_t8n_engine_constructor(GType type, guint n_construct_params, + GObjectConstructParam *construct_params); + +static void ibus_slimt_t8n_engine_destroy(IBusSlimtEngine *slimt_t8n); +static gboolean ibus_slimt_t8n_engine_process_key_event(IBusEngine *engine, + guint keyval, + guint keycode, + guint modifiers); +static void ibus_slimt_t8n_engine_focus_in(IBusEngine *engine); +static void ibus_slimt_t8n_engine_focus_out(IBusEngine *engine); +#if IBUS_CHECK_VERSION(1, 5, 4) +static void ibus_slimt_t8n_engine_set_content_type(IBusEngine *engine, + guint purpose, guint hints); +#endif +static void ibus_slimt_t8n_engine_reset(IBusEngine *engine); +static void ibus_slimt_t8n_engine_enable(IBusEngine *engine); +static void ibus_slimt_t8n_engine_disable(IBusEngine *engine); + +#if 0 +static void ibus_engine_set_cursor_location (IBusEngine *engine, + gint x, + gint y, + gint w, + gint h); +static void ibus_slimt_t8n_engine_set_capabilities + (IBusEngine *engine, + guint caps); +#endif + +static void ibus_slimt_t8n_engine_page_up(IBusEngine *engine); +static void ibus_slimt_t8n_engine_page_down(IBusEngine *engine); +static void ibus_slimt_t8n_engine_cursor_up(IBusEngine *engine); +static void ibus_slimt_t8n_engine_cursor_down(IBusEngine *engine); +static void ibus_slimt_t8n_engine_property_activate(IBusEngine *engine, + const gchar *prop_name, + guint prop_state); +static void ibus_slimt_t8n_engine_candidate_clicked(IBusEngine *engine, + guint index, guint button, + guint state); +#if 0 +static void ibus_slimt_t8n_engine_property_show (IBusEngine *engine, + const gchar *prop_name); +static void ibus_slimt_t8n_engine_property_hide (IBusEngine *engine, + const gchar *prop_name); +#endif + +G_DEFINE_TYPE(IBusSlimtEngine, ibus_slimt_t8n_engine, IBUS_TYPE_ENGINE) + +static void ibus_slimt_t8n_engine_class_init(IBusSlimtEngineClass *klass) { + GObjectClass *object_class = G_OBJECT_CLASS(klass); + IBusObjectClass *ibus_object_class = IBUS_OBJECT_CLASS(klass); + IBusEngineClass *engine_class = IBUS_ENGINE_CLASS(klass); + + object_class->constructor = ibus_slimt_t8n_engine_constructor; + ibus_object_class->destroy = + reinterpret_cast(ibus_slimt_t8n_engine_destroy); + + engine_class->process_key_event = ibus_slimt_t8n_engine_process_key_event; + + engine_class->reset = ibus_slimt_t8n_engine_reset; + engine_class->enable = ibus_slimt_t8n_engine_enable; + engine_class->disable = ibus_slimt_t8n_engine_disable; + + engine_class->focus_in = ibus_slimt_t8n_engine_focus_in; + engine_class->focus_out = ibus_slimt_t8n_engine_focus_out; + +#if IBUS_CHECK_VERSION(1, 5, 4) + engine_class->set_content_type = ibus_slimt_t8n_engine_set_content_type; +#endif + + engine_class->page_up = ibus_slimt_t8n_engine_page_up; + engine_class->page_down = ibus_slimt_t8n_engine_page_down; + + engine_class->cursor_up = ibus_slimt_t8n_engine_cursor_up; + engine_class->cursor_down = ibus_slimt_t8n_engine_cursor_down; + + engine_class->property_activate = ibus_slimt_t8n_engine_property_activate; + + engine_class->candidate_clicked = ibus_slimt_t8n_engine_candidate_clicked; +} + +static void ibus_slimt_t8n_engine_init(IBusSlimtEngine *slimt_t8n) { + if (g_object_is_floating(slimt_t8n)) + g_object_ref_sink(slimt_t8n); // make engine sink +} + +static GObject * +ibus_slimt_t8n_engine_constructor(GType type, guint n_construct_params, + GObjectConstructParam *construct_params) { + IBusSlimtEngine *engine; + const gchar *name; + + engine = reinterpret_cast( + G_OBJECT_CLASS(ibus_slimt_t8n_engine_parent_class) + ->constructor(type, n_construct_params, construct_params)); + name = ibus_engine_get_name(reinterpret_cast(engine)); + engine->engine = new SlimtEngine(IBUS_ENGINE(engine)); + return reinterpret_cast(engine); +} + +static void ibus_slimt_t8n_engine_destroy(IBusSlimtEngine *slimt_t8n) { + delete slimt_t8n->engine; + (static_cast(ibus_slimt_t8n_engine_parent_class)) + ->destroy(reinterpret_cast(slimt_t8n)); +} + +static gboolean ibus_slimt_t8n_engine_process_key_event(IBusEngine *engine, + guint keyval, + guint keycode, + guint modifiers) { + auto *slimt_t8n = reinterpret_cast(engine); + return slimt_t8n->engine->process_key_event(keyval, keycode, modifiers); +} + +#if IBUS_CHECK_VERSION(1, 5, 4) +static void ibus_slimt_t8n_engine_set_content_type(IBusEngine *engine, + guint purpose, guint hints) { + auto *slimt_t8n = reinterpret_cast(engine); + return slimt_t8n->engine->set_content_type(purpose, hints); +} +#endif + +static void ibus_slimt_t8n_engine_property_activate(IBusEngine *engine, + const gchar *prop_name, + guint prop_state) { + auto *slimt_t8n = reinterpret_cast(engine); + slimt_t8n->engine->property_activate(prop_name, prop_state); +} +static void ibus_slimt_t8n_engine_candidate_clicked(IBusEngine *engine, + guint index, guint button, + guint state) { + auto *slimt_t8n = reinterpret_cast(engine); + slimt_t8n->engine->candidate_clicked(index, button, state); +} + +#define FUNCTION(name, Name) \ + static void ibus_slimt_t8n_engine_##name(IBusEngine *engine) { \ + IBusSlimtEngine *slimt_t8n = (IBusSlimtEngine *)engine; \ + slimt_t8n->engine->Name(); \ + ((IBusEngineClass *)ibus_slimt_t8n_engine_parent_class)->name(engine); \ + } +FUNCTION(focus_in, focus_in) +FUNCTION(focus_out, focus_out) +FUNCTION(reset, reset) +FUNCTION(enable, enable) +FUNCTION(disable, disable) +FUNCTION(page_up, page_up) +FUNCTION(page_down, page_down) +FUNCTION(cursor_up, cursor_up) +FUNCTION(cursor_down, cursor_down) +#undef FUNCTION + +Engine::Engine(IBusEngine *engine) : engine_holder_(engine), engine_(engine) { +#if IBUS_CHECK_VERSION(1, 5, 4) + m_input_purpose_ = IBUS_INPUT_PURPOSE_FREE_FORM; +#endif +} + +gboolean Engine::content_is_password() { +#if IBUS_CHECK_VERSION(1, 5, 4) + return static_cast(IBUS_INPUT_PURPOSE_PASSWORD == m_input_purpose_); +#else + return FALSE; +#endif +} + +void Engine::focus_out() { +#if IBUS_CHECK_VERSION(1, 5, 4) + m_input_purpose_ = IBUS_INPUT_PURPOSE_FREE_FORM; +#endif +} + +#if IBUS_CHECK_VERSION(1, 5, 4) +void Engine::set_content_type(guint purpose, guint /*hints*/) { + m_input_purpose_ = static_cast(purpose); +} +#endif + +} // namespace ibus::slimt::t8n diff --git a/ibus-slimt-t8n/engine_compat.h b/ibus-slimt-t8n/engine_compat.h new file mode 100644 index 0000000..1956653 --- /dev/null +++ b/ibus-slimt-t8n/engine_compat.h @@ -0,0 +1,94 @@ +#pragma once +#include + +#include "gtypes.h" + +namespace ibus::slimt::t8n { + +#define IBUS_TYPE_SLIMT_T8N_ENGINE (ibus_slimt_t8n_engine_get_type()) + +GType ibus_slimt_t8n_engine_get_type(); + +class Engine { +public: + explicit Engine(IBusEngine *engine); + virtual ~Engine() = default; + + gboolean content_is_password(); + + // virtual functions + virtual gboolean process_key_event(guint keyval, guint keycode, + guint modifiers) = 0; + virtual void focus_in() = 0; + virtual void focus_out(); +#if IBUS_CHECK_VERSION(1, 5, 4) + virtual void set_content_type(guint purpose, guint hints); +#endif + virtual void reset() = 0; + virtual void enable() = 0; + virtual void disable() = 0; + virtual void page_up() = 0; + virtual void page_down() = 0; + virtual void cursor_up() = 0; + virtual void cursor_down() = 0; + virtual gboolean property_activate(const gchar *prop_name, + guint prop_state) = 0; + virtual void candidate_clicked(guint index, guint button, guint state) = 0; + +protected: + void commit_text(const g::Text &text) const { + ibus_engine_commit_text(engine_, text.get()); + } + + void update_preedit_text(const g::Text &text, guint cursor, + gboolean visible) const { + ibus_engine_update_preedit_text(engine_, text.get(), cursor, visible); + } + + void show_preedit_text() const { ibus_engine_show_preedit_text(engine_); } + + void hide_preedit_text() const { ibus_engine_hide_preedit_text(engine_); } + + void update_auxiliary_text(const g::Text &text, gboolean visible) const { + ibus_engine_update_auxiliary_text(engine_, text.get(), visible); + } + + void show_auxiliary_text() const { ibus_engine_show_auxiliary_text(engine_); } + + void hide_auxiliary_text() const { ibus_engine_hide_auxiliary_text(engine_); } + + void update_lookup_table(const g::LookupTable &table, + gboolean visible) const { + ibus_engine_update_lookup_table(engine_, table.get(), visible); + } + + void update_lookup_table_fast(const g::LookupTable &table, + gboolean visible) const { + ibus_engine_update_lookup_table_fast(engine_, table.get(), visible); + } + + void show_lookup_table() const { ibus_engine_show_lookup_table(engine_); } + + void hide_lookup_table() const { ibus_engine_hide_lookup_table(engine_); } + + static void clear_lookup_table(const g::LookupTable &table) { + ibus_lookup_table_clear(table.get()); + } + + void register_properties(const g::PropList &props) const { + ibus_engine_register_properties(engine_, props.get()); + } + + void update_property(const g::Property &prop) const { + ibus_engine_update_property(engine_, prop.get()); + } + + g::Holder engine_holder_; // engine pointer + IBusEngine *engine_; + +#if IBUS_CHECK_VERSION(1, 5, 4) + IBusInputPurpose m_input_purpose_; +#endif +}; + +} // namespace ibus::slimt::t8n diff --git a/ibus-slimt-t8n/gtypes.h b/ibus-slimt-t8n/gtypes.h new file mode 100644 index 0000000..836f26a --- /dev/null +++ b/ibus-slimt-t8n/gtypes.h @@ -0,0 +1,245 @@ +#pragma once + +#include +#include +#include + +// This file includes types wrapping the GLIB objects into RAII C++ classes. +// GLIB objects follow some global reference-counting. This is an alteration +// from the original import from https://github.com/libzhuyin/ibus-libzhuyin, +// but appears to work. +// +// Not written in the best of states - expect rough edges. +namespace g { + +// RAII wrap automating some things for a GLIB pointer object +// The following concepts +// exist: +// +// 1. Increase ref-count equivalent to increasing the count of usage in +// std::shared_ptr. +// 2. Decrease ref-count equivalent to reducing count of usage in +// std::shared_ptr. When this hits 0, the allocated object is freed. +// 3. Borrowing (equivalent to .get(), when there is no alteration to +// reference-count, but a pointer is passed around). +// +// +// This behavior is translated into a Holder for a Raw pointer (along the same +// lines as an std::shared_ptr) as +// 1. Construction = g_object_ref_sink(...) +// 2. Destruction = g_object_unrf(....) +// 3. Borrowing = .get() +// +// With the above in place, there is not much need to bother about +// reference-count updates, they happen taking advantage of C++'s RAII, similar +// to the operations of a shared_ptr. +template struct Holder { +public: + explicit Holder(Raw *p = nullptr) : pointer_(nullptr) { set(p); } + ~Holder() { set(nullptr); } + + // Assingment from raw-pointer. + Holder &operator=(Raw *p) { + set(p); + return *this; + } + + // Copy (construction + assignment) + Holder(const Holder &other) { + // Simply set this pointer, incrementing reference. + set(other.pointer_); + }; + + Holder &operator=(const Holder &other) { + if (this != &other) { + // Avoid circular references, set. + set(other.pointer_); + } + return *this; + } + + // Move (construction + assignment) + Holder(Holder &&other) noexcept { + // Set this, unset other. + set(other.pointer_); + other.set(nullptr); + } + + Holder &operator=(Holder &&other) noexcept { + if (this != &other) { + // Avoid circular messups, set this, unset other. + set(other.pointer_); + other.set(nullptr); + } + return *this; + }; + + // Consider different cases of dereferencing a Holder t + + // x = *t; const read + const Raw *operator->() const { return pointer_; } + // t->fn(...) In case t is an object with methods. + Raw *operator->() { return pointer_; } + + // *t = x; not const, write. + // operator Raw *() const { return pointer_; } + Raw *get() const { return pointer_; } + +private: + Raw *pointer_ = nullptr; + + void set(Raw *other) { + if (pointer_) { + auto *g_object_pointer = reinterpret_cast(pointer_); + g_object_unref(g_object_pointer); + } + + pointer_ = other; + if (other) { + g_debug("%s, floating = %d", G_OBJECT_TYPE_NAME(other), + g_object_is_floating(other)); + g_object_ref_sink(other); + } + } +}; + +// All IBUS type wrappers inherit from Object (Holder). A CRTP is used +// to embed the Derived class information at Object for .get(). +template class Object { +public: + explicit Object(Derived *p) : pointer_(reinterpret_cast(p)) { + // g_assert(pointer_.get() != nullptr); + } + + explicit operator GObject *() const { return pointer_.get(); } + + Derived *get() const { return reinterpret_cast(pointer_.get()); } + +private: + Holder pointer_; +}; + +class Text : public Object { +public: + explicit Text(IBusText *text) : Object(text) {} + explicit Text(const gchar *str) : Object(ibus_text_new_from_string(str)) {} + + explicit Text(const std::string &str) + : Object(ibus_text_new_from_string(str.c_str())) {} + + explicit Text(gunichar ch) : Object(ibus_text_new_from_unichar(ch)) {} + + void append_attribute(guint type, guint value, guint start, guint end) { + ibus_text_append_attribute(get(), type, value, start, end); + } + + const gchar *text() const { return get()->text; } +}; + +class StaticText : public Text { +public: + explicit StaticText(const gchar *str) + : Text(ibus_text_new_from_static_string(str)) {} + + explicit StaticText(const std::string &str) + : Text(ibus_text_new_from_static_string(str.c_str())) {} + + explicit StaticText(gunichar ch) : Text(ch) {} +}; + +class LookupTable : public Object { +public: + explicit LookupTable(guint page_size = 10, guint cursor_pos = 0, + gboolean cursor_visible = TRUE, gboolean round = FALSE) + : Object(ibus_lookup_table_new(page_size, cursor_pos, cursor_visible, + round)) {} + + guint page_size() const { return ibus_lookup_table_get_page_size(get()); } + guint orientation() const { return ibus_lookup_table_get_orientation(get()); } + guint cursor_pos() const { return ibus_lookup_table_get_cursor_pos(get()); } + guint size() const { + return ibus_lookup_table_get_number_of_candidates(get()); + } + + gboolean page_up() const { return ibus_lookup_table_page_up(get()); } + gboolean page_down() const { return ibus_lookup_table_page_down(get()); } + gboolean cursor_up() const { return ibus_lookup_table_cursor_up(get()); } + gboolean cursor_down() const { return ibus_lookup_table_cursor_down(get()); } + + void set_page_size(guint size) const { + ibus_lookup_table_set_page_size(get(), size); + } + void set_cursor_pos(guint pos) const { + ibus_lookup_table_set_cursor_pos(get(), pos); + } + void set_orientation(gint orientation) const { + ibus_lookup_table_set_orientation(get(), orientation); + } + void clear() const { ibus_lookup_table_clear(get()); } + void set_cursor_visable(gboolean visable) const { + ibus_lookup_table_set_cursor_visible(get(), visable); + } + void set_label(guint index, IBusText *text) const { + ibus_lookup_table_set_label(get(), index, text); + } + void append_candidate(IBusText *text) const { + ibus_lookup_table_append_candidate(get(), text); + } + void append_label(IBusText *text) const { + ibus_lookup_table_append_label(get(), text); + } + + IBusText *get_candidate(guint index) const { + return ibus_lookup_table_get_candidate(get(), index); + } +}; + +class Property : public Object { +public: + explicit Property(const gchar *key, IBusPropType type = PROP_TYPE_NORMAL, + IBusText *label = nullptr, const gchar *icon = nullptr, + IBusText *tooltip = nullptr, gboolean sensitive = TRUE, + gboolean visible = TRUE, + IBusPropState state = PROP_STATE_UNCHECKED, + IBusPropList *props = nullptr) + : Object(ibus_property_new(key, type, label, icon, tooltip, sensitive, + visible, state, props)) {} + + void set_label(IBusText *text) { ibus_property_set_label(get(), text); } + + void set_label(const gchar *text) { + Text t(text); + set_label(t.get()); + } + + void set_icon(const gchar *icon) { ibus_property_set_icon(get(), icon); } + + void set_symbol(IBusText *text) { ibus_property_set_symbol(get(), text); } + + void set_symbol(const gchar *text) { + Text t(text); + set_symbol(t.get()); + } + + void set_sensitive(gboolean sensitive) { + ibus_property_set_sensitive(get(), sensitive); + } + + void set_tooltip(IBusText *text) { ibus_property_set_tooltip(get(), text); } + + void set_tooltip(const gchar *text) { + Text t(text); + set_tooltip(t.get()); + } +}; + +class PropList : public Object { +public: + PropList() : Object(ibus_prop_list_new()) {} + + void append(const Property &property) { + ibus_prop_list_append(get(), property.get()); + } +}; + +} // namespace g diff --git a/ibus-slimt-t8n/ibus_config.h.in b/ibus-slimt-t8n/ibus_config.h.in new file mode 100644 index 0000000..4763925 --- /dev/null +++ b/ibus-slimt-t8n/ibus_config.h.in @@ -0,0 +1,16 @@ +// clang-format off +#define PROJECT_SHORTNAME "@PROJECT_SHORTNAME@" +#define PROJECT_LONGNAME "@PROJECT_LONGNAME@" +#define PROJECT_DESCRIPTION "@PROJECT_DESCRIPTION@" +#define PROJECT_VERSION "@PROJECT_VERSION@" +#define PROJECT_LICENSE "@PROJECT_LICENSE@" +#define PROJECT_HOMEPAGE "@PROJECT_HOMEPAGE@" +#define AUTHOR "@AUTHOR@" + +#define IBUS_BUS_NAME "@IBUS_BUS_NAME@" +#define IBUS_ICON "@IBUS_ICON@" +#define IBUS_LAYOUT "@IBUS_LAYOUT@" +#define IBUS_LANGUAGE "@IBUS_LANGUAGE@" +#define IBUS_TEXTDOMAIN "@IBUS_TEXTDOMAIN@" +#define IBUS_COMPONENT_COMMANDLINE "@IBUS_COMPONENT_COMMANDLINE" +// clang-format on diff --git a/ibus-slimt-t8n/logging.h b/ibus-slimt-t8n/logging.h new file mode 100644 index 0000000..e6aaa3a --- /dev/null +++ b/ibus-slimt-t8n/logging.h @@ -0,0 +1,5 @@ +#pragma once +#include + +#define APPNAME "ibus-slimt-t8n" +#define LOG(...) g_log(APPNAME, G_LOG_LEVEL_MESSAGE, __VA_ARGS__) diff --git a/ibus-slimt-t8n/main.cpp b/ibus-slimt-t8n/main.cpp new file mode 100644 index 0000000..748735c --- /dev/null +++ b/ibus-slimt-t8n/main.cpp @@ -0,0 +1,33 @@ +#include "ibus-slimt-t8n/application.h" +#include "ibus-slimt-t8n/engine_compat.h" +#include + +int main(int argc, char **argv) { + /* command line options */ + gboolean ibus = FALSE; + gboolean verbose = FALSE; + + const GOptionEntry entries[] = { + {"ibus", 'i', 0, G_OPTION_ARG_NONE, &ibus, + "component is executed by ibus", nullptr}, + {"verbose", 'v', 0, G_OPTION_ARG_NONE, &verbose, "verbose", nullptr}, + {nullptr}, + }; + + GError *error = nullptr; + GOptionContext *context; + + /* Parse the command line */ + context = g_option_context_new("- ibus slimt-t8n engine component"); + g_option_context_add_main_entries(context, entries, "ibus-slimt-t8n"); + + if (!g_option_context_parse(context, &argc, &argv, &error)) { + g_print("Option parsing failed: %s\n", error->message); + g_error_free(error); + return (-1); + } + + ibus::slimt::t8n::Application application(ibus); + ibus::slimt::t8n::Application::run(); + return 0; +} diff --git a/ibus-slimt-t8n/slimt-t8n.xml.in b/ibus-slimt-t8n/slimt-t8n.xml.in new file mode 100644 index 0000000..3715880 --- /dev/null +++ b/ibus-slimt-t8n/slimt-t8n.xml.in @@ -0,0 +1,29 @@ + + + + ${IBUS_BUS_NAME} + ${PROJECT_DESCRIPTION} + ${IBUS_FULL_COMMANDLINE} + ${PROJECT_VERSION_STRING_FULL} + ${AUTHOR_XML_ESCAPED} + ${PROJECT_LICENSE} + ${PROJECT_HOMEPAGE} + ${IBUS_TEXTDOMAIN} + + + + ${PROJECT_SHORTNAME} + ${IBUS_LANGUAGE} + ${PROJECT_LICENSE} + ${AUTHOR_XML_ESCAPED} + ${IBUS_ICON} + ${IBUS_LAYOUT} + ${PROJECT_LONGNAME} + ${PROJECT_DESCRIPTION} + 99 + 🍋 + /usr/lib/ibus/ibus-setup-pinyin pinyin + + + + diff --git a/ibus-slimt-t8n/slimt_engine.cpp b/ibus-slimt-t8n/slimt_engine.cpp new file mode 100644 index 0000000..81925b2 --- /dev/null +++ b/ibus-slimt-t8n/slimt_engine.cpp @@ -0,0 +1,335 @@ +#include "ibus-slimt-t8n/slimt_engine.h" +#include "ibus-slimt-t8n/engine_compat.h" +#include +#include +#include +#include +#include + +namespace ibus::slimt::t8n { + +namespace { + +template T8r make() { + auto config = ibus_slimt_t8n_config(); + return T8r(config); +} + +} // namespace + +g::PropList SlimtEngine::make_children(const std::string &side, + const StringSet &languages, + const std::string &default_language) { + bool first = false; + g::PropList properties; + for (const auto &lang : languages) { + std::string key = side + "_" + lang; // NOLINT + g::Text label(lang); + IBusPropState state = + (lang == default_language) ? PROP_STATE_CHECKED : PROP_STATE_UNCHECKED; + + IBusPropList *children = nullptr; + const gchar *icon = nullptr; + gboolean sensitive = TRUE; + gboolean visible = TRUE; + + g::Property property( // + key.c_str(), PROP_TYPE_RADIO, label.get(), icon, label.get(), sensitive, + visible, state, + children // + ); + + properties.append(property); + } + + return properties; +} + +SlimtEngine::Select SlimtEngine::make_select(const std::string &key, // + const std::string &tooltip, // + const StringSet &languages, // + const std::string &value // +) { + const gchar *gkey = key.c_str(); + const gchar *icon = nullptr; + g::Text glabel(key); + g::Text gtooltip(tooltip); + g::PropList gchildren = make_children(key, languages, value); + gboolean sensitive = TRUE; + gboolean visible = TRUE; + + g::Property node(gkey, PROP_TYPE_MENU, glabel.get(), icon, gtooltip.get(), + sensitive, visible, PROP_STATE_CHECKED, gchildren.get()); + + Select select{ + .node = std::move(node), // + .options = std::move(gchildren) // + }; + + return select; +} + +g::Property SlimtEngine::make_verify(bool enable_sensitive) { // + const gchar *icon = nullptr; + g::Text glabel("verify"); + g::Text gtooltip("Verify with backtranslated text as second candidate."); + auto sensitive = static_cast(enable_sensitive); + gboolean visible = TRUE; + IBusPropList *children = nullptr; + g::Property verify("verify", PROP_TYPE_TOGGLE, glabel.get(), icon, + gtooltip.get(), sensitive, visible, PROP_STATE_UNCHECKED, + children); + return verify; +} + +SlimtEngine::UI SlimtEngine::make_ui(Translator &translator) { + + Direction direction = translator.default_direction(); + translator.set_direction(direction); + + Select source = make_select( // + "source", "Source language", // + translator.languages().source, // + direction.source); + Select target = make_select( // + "target", "Target language", // + translator.languages().target, // + direction.target); + + bool enable_sensitive = true; + auto verify = make_verify(enable_sensitive); + + // Assign UI. + return { + .source = std::move(source), // + .target = std::move(target), // + .verify = std::move(verify), // + }; +} + +/* constructor */ +SlimtEngine::SlimtEngine(IBusEngine *engine) + : Engine(engine), translator_(make()), + ui_(make_ui(translator_)) { + LOG("slimt-t8n engine started"); +} + +/* destructor */ +SlimtEngine::~SlimtEngine() { hide_lookup_table(); } + +gboolean SlimtEngine::process_key_event(guint keyval, guint /*keycode*/, + guint modifiers) { + // If both langs are set to equal, translation mechanism needn't kick in. + if (translator_.direction().source == translator_.direction().target) { + return 0; + } + + if (content_is_password()) + return FALSE; + + if (modifiers & IBUS_RELEASE_MASK) { + return FALSE; + } + + // We are skipping any modifiers. Our workflow is simple. Ctrl-Enter key is + // send. + if (modifiers & IBUS_CONTROL_MASK && keyval == IBUS_Return) { + g::Text text(buffer_.target); + commit_text(text); + buffer_.source.clear(); + buffer_.target.clear(); + hide_lookup_table(); + return TRUE; + } + + // If ctrl modifier or something is else, we let it pass + if (modifiers & IBUS_CONTROL_MASK) { + return FALSE; + } + + gboolean retval = FALSE; + switch (keyval) { + case IBUS_space: { + if (buffer_.source.empty()) { + update_buffer(" "); + commit(); + } else if (buffer_.source.back() == ' ') { + commit(); + } else { + update_buffer(" "); + retval = TRUE; + } + + } break; + case IBUS_Return: { + if (buffer_.target.empty()) { + // We have no use for empty enters. + return 0; + } + buffer_.target += "\n"; + commit(); + retval = TRUE; + + } break; + case IBUS_BackSpace: { + if (buffer_.source.empty()) { + // Let the backspace through. + retval = FALSE; + } else { + buffer_.source.pop_back(); + refresh_translation(); + retval = TRUE; + } + } break; + case IBUS_Left: + case IBUS_Right: + case IBUS_Up: + case IBUS_Down: + return FALSE; + break; + + default: { + if (isprint(static_cast(keyval))) { + std::string append; + append += static_cast(keyval); + update_buffer(append); + retval = TRUE; + } else { + retval = FALSE; + } + } break; + } + return retval; +} + +void SlimtEngine::update_buffer(const std::string &append) { + buffer_.source += append; + refresh_translation(); +} + +void SlimtEngine::refresh_translation() { + if (!buffer_.source.empty()) { + std::string translation = translator_.translate(buffer_.source); + buffer_.target = translation; + std::vector entries = {buffer_.source}; + if (translator_.verify()) { + std::string backtranslation = translator_.backtranslate(translation); + entries.push_back(backtranslation); + } + g::LookupTable table = generate_lookup_table(entries); + update_lookup_table(table, + /*visible=*/static_cast(!entries.empty())); + + cursor_position_ = buffer_.target.size(); + g::Text pre_edit(buffer_.target); + update_preedit_text(pre_edit, cursor_position_, /*visible=*/TRUE); + show_lookup_table(); + } else { + // Buffer is already clear (empty). + // We will manually clear the buffer_.target. + buffer_.target.clear(); + + cursor_position_ = buffer_.target.size(); + g::Text pre_edit(buffer_.target); + update_preedit_text(pre_edit, cursor_position_, /*visible=*/FALSE); + hide_preedit_text(); + + hide_lookup_table(); + } +} + +void SlimtEngine::commit() { + g::Text text(buffer_.target); + commit_text(text); + hide_lookup_table(); + + buffer_.source.clear(); + buffer_.target.clear(); + + hide_lookup_table(); + cursor_position_ = 0; + g::Text pre_edit(""); + update_preedit_text(pre_edit, cursor_position_, TRUE); +} + +void SlimtEngine::focus_in() { + g::PropList properties; + properties.append(ui_.source.node); + properties.append(ui_.target.node); + properties.append(ui_.verify); + register_properties(properties); +} + +void SlimtEngine::focus_out() { + buffer_.source.clear(); + buffer_.target.clear(); + Engine::focus_out(); +} + +void SlimtEngine::reset() {} + +void SlimtEngine::enable() {} + +void SlimtEngine::disable() {} + +void SlimtEngine::page_up() {} + +void SlimtEngine::page_down() {} + +void SlimtEngine::cursor_up() {} + +void SlimtEngine::cursor_down() {} + +inline void SlimtEngine::show_setup_dialog() { + // g_spawn_command_line_async(LIBEXECDIR "/ibus-setup-libzhuyin zhuyin", + // NULL); +} + +gboolean SlimtEngine::property_activate(const char *cprop_name, + guint prop_state) { + std::string prop_name(cprop_name); + Direction direction = translator_.direction(); + if (prop_name == "verify") { + LOG("Verify translation is %d -> %d", translator_.verify(), prop_state); + bool verify = (prop_state != 0U); + LOG("Enabling backtranslation %s -> %s", direction.target.c_str(), + direction.source.c_str()); + if (translator_.verifiable()) { + translator_.set_verify(verify); + } + } else { + const std::string &serialized(prop_name); + constexpr size_t kPrefixLength = 6; + constexpr size_t kSeparatorLength = 1; + std::string side = serialized.substr(0, kPrefixLength); + std::string lang = + serialized.substr(kPrefixLength + kSeparatorLength, serialized.size()); + if (prop_state == 1) { + LOG("%s [%s] [%s]", prop_name.c_str(), side.c_str(), lang.c_str()); + if (side == "source") { + direction.source = lang; + } else { + direction.target = lang; + } + translator_.set_direction(direction); + + ui_.verify = make_verify(translator_.verifiable()); + update_property(ui_.verify); + } + } + return FALSE; +} + +void SlimtEngine::candidate_clicked(guint index, guint button, guint state) {} + +g::LookupTable +SlimtEngine::generate_lookup_table(const std::vector &entries) { + g::LookupTable lookup_table; + for (const auto &entry : entries) { + g::Text text(entry); + lookup_table.append_candidate(text.get()); + } + return lookup_table; +} + +} // namespace ibus::slimt::t8n diff --git a/ibus-slimt-t8n/slimt_engine.h b/ibus-slimt-t8n/slimt_engine.h new file mode 100644 index 0000000..07cd887 --- /dev/null +++ b/ibus-slimt-t8n/slimt_engine.h @@ -0,0 +1,77 @@ +#pragma once + +#include "ibus-slimt-t8n/engine_compat.h" +#include "ibus-slimt-t8n/translator.h" +#include +#include +#include + +namespace ibus::slimt::t8n { + +/// Idea here is to maintain an active buffer string. +// +// 1. The first suggestion is the translated text. +// 2. The second suggestion is the raw text the user entered. +class SlimtEngine : public Engine { +public: + explicit SlimtEngine(IBusEngine *engine); + ~SlimtEngine() override; + + // virtual functions + gboolean process_key_event(guint keyval, guint keycode, + guint modifiers) override; + void focus_in() override; + void focus_out() override; + void reset() override; + void enable() override; + void disable() override; + void page_up() override; + void page_down() override; + void cursor_up() override; + void cursor_down() override; + gboolean property_activate(const gchar *prop_name, guint prop_state) override; + void candidate_clicked(guint index, guint button, guint state) override; + +private: + void show_setup_dialog(); + + static g::LookupTable + generate_lookup_table(const std::vector &entries); + + void update_buffer(const std::string &append); + void refresh_translation(); + void commit(); + + Pair buffer_; + gint cursor_position_; + + Translator translator_; + Direction direction_; + + struct Select { + g::Property node; + g::PropList options; + }; + + struct UI { + Select source; + Select target; + g::Property verify; + }; + + UI ui_; + + static UI make_ui(Translator &translator); + static g::PropList make_children(const std::string &side, + const StringSet &languages, + const std::string &default_language); + + static Select make_select(const std::string &key, // + const std::string &tooltip, // + const StringSet &languages, // + const std::string &value); + + static g::Property make_verify(bool enable_sensitive); +}; + +} // namespace ibus::slimt::t8n diff --git a/ibus-slimt-t8n/test.cpp b/ibus-slimt-t8n/test.cpp new file mode 100644 index 0000000..f38424d --- /dev/null +++ b/ibus-slimt-t8n/test.cpp @@ -0,0 +1,44 @@ +#include "ibus-slimt-t8n/logging.h" +#include "ibus-slimt-t8n/translator.h" +#include +#include + +template void repl(const std::string &config) { + std::cout << "Type in: " + << "\n"; + std::cout << " " + << " \n"; + + std::string input; + using Direction = ibus::slimt::t8n::Direction; + Direction old; + Direction current; + Translator translator(config); + + while (!std::cin.eof()) { + std::cout << " $ "; + std::cin >> current.source; + std::cin >> current.target; + std::getline(std::cin, input); + if (current.source != old.source || current.target != old.target) { + translator.set_direction(current); + old = current; + } + auto translation = translator.translate(input); + std::cout << translation << "\n"; + LOG("Direction %s -> %s: %s / %s", current.source.c_str(), + current.target.c_str(), input.c_str(), translation.c_str()); + } +} + +int main(int argc, char **argv) { + std::string mode((argc == 2) ? argv[1] : ""); + auto config = ibus::slimt::t8n::ibus_slimt_t8n_config(); + if (mode == "fake") { + repl(config); + } else { + repl(config); + } + + return 0; +} diff --git a/ibus-slimt-t8n/translator.cpp b/ibus-slimt-t8n/translator.cpp new file mode 100644 index 0000000..2280a26 --- /dev/null +++ b/ibus-slimt-t8n/translator.cpp @@ -0,0 +1,319 @@ +#include "ibus-slimt-t8n/translator.h" +#include +#include +#include + +#include "yaml-cpp/yaml.h" +#include + +namespace ibus::slimt::t8n { + +Direction reverse(const Direction &direction) { + return { + .source = direction.target, // + .target = direction.source // + }; +} + +Inventory::Inventory(const std::string &config_path) { + inventory_ = load(config_path); + using Strings = std::vector; + auto select_languages = inventory_["languages"].as(); + select_languages_.insert(select_languages.begin(), select_languages.end()); + + YAML::Node models = inventory_["models"]; + for (const YAML::Node &model : models) { + // std::string type = entry["type"].GetString(); + YAML::Node node = model["direction"]; + + Direction direction{ + .source = node["source"].as(), // + .target = node["target"].as() // + }; + + auto preferred = [&, this](const std::string &lang) { + return select_languages_.find(lang) != select_languages_.end(); + }; + + if (preferred(direction.source) and preferred(direction.target)) { + languages_.source.insert(direction.source); + languages_.target.insert(direction.target); + } + + directions_[direction] = model; + } + + default_direction_ = { + .source = inventory_["default"]["source"].as(), // + .target = inventory_["default"]["target"].as() // + }; + + verify_ = inventory_["verify"].as(); +} + +std::shared_ptr make_model(const YAML::Node &config) { + auto root = config["root"].as(); + auto prefix_root = [&root](const std::string &path) { + return root + "/" + path; + }; + + Package path{ + .model = prefix_root(config["model"].as()), // + .vocabulary = + prefix_root(config["vocabs"]["source"].as()), // + .shortlist = prefix_root(config["shortlist"].as()) // + }; + + LOG("model_path: %s", path.model.c_str()); + Model::Config arch = ::slimt::preset::tiny(); + return std::make_shared(arch, path); +} + +std::shared_ptr Inventory::query(const Direction &direction) const { + auto query = directions_.find(direction); + if (query != directions_.end()) { + return make_model(query->second); + } + return nullptr; +} + +const Languages &Inventory::languages() const { return languages_; } + +bool Inventory::exists(const Direction &direction) const { + auto query = directions_.find(direction); + return query != directions_.end(); +} + +const Direction &Inventory::default_direction() const { + return default_direction_; +} + +bool Inventory::Equal::operator()(const Direction &lhs, + const Direction &rhs) const { + return lhs.source == rhs.source && lhs.target == rhs.target; +} + +size_t Inventory::Hash::operator()(const Direction &direction) const { + auto hash_combine = [](size_t &seed, size_t next) { + seed ^= (std::hash{}(next) // + + 0x9e3779b9 // NOLINT + + (seed << 6) // NOLINT + + (seed >> 2) // NOLINT + ); + }; + + size_t seed = std::hash{}(direction.source); + hash_combine(seed, std::hash{}(direction.target)); + return seed; +} + +YAML::Node Inventory::load(const std::string &path) { + YAML::Node tree = YAML::LoadFile(path); + return tree; +} + +void Translator::load_model(const Direction &direction, + Translator::Chain &chain) { + if (direction.source == "English" or direction.target == "English") { + std::shared_ptr model = inventory_.query(direction); + if (model) { + chain.first = model; + LOG("Found model for (%s -> %s)", direction.source.c_str(), + direction.target.c_str()); + } else { + LOG("No model found for %s -> %s", direction.source.c_str(), + direction.target.c_str()); + } + } else { + // Try to translate by pivoting. + Direction to_en{ + .source = direction.source, // + .target = "English" // + }; + + Direction from_en{ + .source = "English", // + .target = direction.target // + }; + + std::shared_ptr first = inventory_.query(to_en); + std::shared_ptr second = inventory_.query(from_en); + + if (first && second) { + chain.first = first; + chain.second = second; + LOG("Found model for (%s -> [en] -> %s)", direction.source.c_str(), + direction.target.c_str()); + } else { + LOG("Unable to generate model (%d) %s -> [en] -> %s %d ", + first == nullptr, direction.source.c_str(), direction.target.c_str(), + second == nullptr); + } + } +} + +void Translator::set_direction(const Direction &direction) { + direction_ = direction; + load_model(direction, forward_); +} + +void Translator::set_verify(bool verify) { + verify_ = verify; + Direction back = reverse(direction_); + load_model(back, backward_); +} + +bool Translator::verifiable() const { + Direction back = reverse(direction_); + if (back.source == "English" or back.target == "English") { + return inventory_.exists(back); + } else { // NOLINT + // Try to translate by pivoting. + Direction to_en{ + .source = back.source, // + .target = "English" // + }; + + Direction from_en{ + .source = "English", // + .target = back.target // + }; + + return inventory_.exists(to_en) and inventory_.exists(from_en); + } +} + +std::string Translator::translate(const std::string &source) { + Options options{.html = false}; + + if (forward_.first && forward_.second) { + // Pivoting. + Handle handle = + service_.pivot(forward_.first, forward_.second, source, options); + Response response = handle.future().get(); + return response.target.text; + } + + assert(forward_.first != nullptr); + + Handle handle = service_.translate(forward_.first, source, options); + Response response = handle.future().get(); + return response.target.text; +} + +std::string Translator::backtranslate(const std::string &source) { + Options options{.html = false}; + if (backward_.first && backward_.second) { + // Pivoting. + Handle handle = + service_.pivot(backward_.first, backward_.second, source, options); + Response response = handle.future().get(); + return response.target.text; + } + + assert(backward_.first != nullptr); + + Handle handle = service_.translate(backward_.first, source, options); + Response response = handle.future().get(); + return response.target.text; +} + +const Languages &Translator::languages() const { + return inventory_.languages(); +} + +const Direction &Translator::default_direction() const { + return inventory_.default_direction(); +} + +void FakeTranslator::set_direction(const Direction &direction) { + direction_ = direction; +} + +void FakeTranslator::set_verify(bool verify) { verify_ = verify; } + +std::string FakeTranslator::translate(std::string input) { // NOLINT + + std::string response; + if (input.empty()) { + return response; + } + + // For a given length, generates a 6 length set of tokens. + // Entire string is changed by seeding with length each time. + // Simulates translation in some capacity. + auto transform = [](size_t length) -> std::string { + std::mt19937_64 generator; + constexpr size_t kTruncateLength = 6; + generator.seed(length); + std::string target; + for (size_t i = 0; i < length; i++) { + if (i != 0) { + target += " "; + } + size_t value = generator(); + constexpr size_t kMaxLength = 20; + std::string hex(kMaxLength, ' '); + std::sprintf(hex.data(), "%x", static_cast(value)); + // 2 to truncate 0x. + target += hex.substr(2, kTruncateLength); + } + return target; + }; + + auto token_count = [](const std::string &input) -> size_t { + std::string token; + size_t count = 0; + for (char c : input) { + if (isspace(c)) { + // Check for space. + if (!token.empty()) { + // Start of a new word. + ++count; + token = ""; + } + } else { + token += std::string(1, c); + } + } + // Non space-detected overhang. + if (!token.empty()) { + count += 1; + } + + return count; + }; + + size_t count = token_count(input); + std::string target = transform(count); + return target; +} + +std::string FakeTranslator::backtranslate(std::string input) { + return translate(std::move(input)); +} + +const Languages &FakeTranslator::languages() const { return languages_; } + +const Direction &FakeTranslator::default_direction() const { + return direction_; +} + +std::string ibus_slimt_t8n_config() { + namespace fs = std::filesystem; + fs::path home = std::getenv("HOME"); + + // Setup logging + // fs::path log_path = home / ".local" / "var" / "ibus-slimt-t8n.log"; + // setup_logging(log_path.string()); + // g_log("ibus-slimt-t8n", // + // G_LOG_LEVEL_MESSAGE, // + // "Creating log at: %s", log_path.string().c_str()); + + // Pickup config-defaults. + fs::path config = home / ".config"; + auto path = (config / "ibus-slimt-t8n.yml").string(); + return path; +} + +} // namespace ibus::slimt::t8n diff --git a/ibus-slimt-t8n/translator.h b/ibus-slimt-t8n/translator.h new file mode 100644 index 0000000..b8f49e9 --- /dev/null +++ b/ibus-slimt-t8n/translator.h @@ -0,0 +1,126 @@ +#pragma once +#include "ibus-slimt-t8n/logging.h" +#include "slimt/slimt.hh" +#include "yaml-cpp/yaml.h" +#include +#include +#include + +namespace ibus::slimt::t8n { + +template struct Pair { + Field source; + Field target; +}; + +using Direction = Pair; +using Strings = std::vector; +using StringSet = std::set; +using Languages = Pair; + +template using Package = ::slimt::Package; +using Config = ::slimt::Config; +using Model = ::slimt::Model; +using Async = ::slimt::Async; +using Options = ::slimt::Options; +using Handle = ::slimt::Handle; +using Response = ::slimt::Response; + +Direction reverse(const Direction &direction); + +class Inventory { +public: + explicit Inventory(const std::string &config_path); + std::shared_ptr query(const Direction &direction) const; + const Languages &languages() const; + bool verify() const { return verify_; } + bool exists(const Direction &direction) const; + const Direction &default_direction() const; + +private: + struct Hash { + size_t operator()(const Direction &direction) const; + }; + + struct Equal { + bool operator()(const Direction &lhs, const Direction &rhs) const; + }; + + std::unordered_map directions_; + std::set select_languages_; + Languages languages_; + Direction default_direction_; + YAML::Node inventory_; + bool verify_; + static YAML::Node load(const std::string &path); +}; + +class Translator { +public: + explicit Translator(const std::string &ibus_config_path) + : service_(Config{}), inventory_(ibus_config_path), + verify_(inventory_.verify()) {} + + void set_direction(const Direction &direction); + void set_verify(bool verify); + bool verify() const { return verify_; } + bool verifiable() const; + const Direction &direction() const { return direction_; } + + std::string translate(const std::string &source); + std::string backtranslate(const std::string &source); + + const Direction &default_direction() const; + const Languages &languages() const; + +private: + using ModelPtr = std::shared_ptr; + using Chain = std::pair; + + void load_model(const Direction &direction, Chain &chain); + + Inventory inventory_; + Direction direction_; + + Async service_; + + Chain forward_; + Chain backward_; + + bool verify_; +}; + +class FakeTranslator { +public: + explicit FakeTranslator(const std::string &){}; + + void set_direction(const Direction &direction); + void set_verify(bool verify); + + bool verify() const { return verify_; } + const Direction &direction() const { return direction_; } + + std::string translate(std::string input); + std::string backtranslate(std::string input); + + const Direction &default_direction() const; + const Languages &languages() const; + +private: + Languages languages_ = { + {"English", "German", "French"}, // + {"English", "German", "French"} // + }; + + Direction direction_{ + .source = "English", // + .target = "German" // + }; + + bool verify_ = false; +}; + +void make_translator(); +std::string ibus_slimt_t8n_config(); + +} // namespace ibus::slimt::t8n diff --git a/run-clang-format.py b/run-clang-format.py new file mode 100644 index 0000000..dcabaf1 --- /dev/null +++ b/run-clang-format.py @@ -0,0 +1,408 @@ +#!/usr/bin/env python +"""A wrapper script around clang-format, suitable for linting multiple files +and to use for continuous integration. + +This is an alternative API for the clang-format command line. +It runs over multiple files and directories in parallel. +A diff output is produced and a sensible exit code is returned. + +""" + +from __future__ import print_function, unicode_literals + +import argparse +import codecs +import difflib +import fnmatch +import io +import errno +import multiprocessing +import os +import signal +import subprocess +import sys +import traceback + +from functools import partial + +try: + from subprocess import DEVNULL # py3k +except ImportError: + DEVNULL = open(os.devnull, "wb") + + +DEFAULT_EXTENSIONS = 'c,h,C,H,cpp,hpp,cc,hh,c++,h++,cxx,hxx' +DEFAULT_CLANG_FORMAT_IGNORE = '.clang-format-ignore' + + +class ExitStatus: + SUCCESS = 0 + DIFF = 1 + TROUBLE = 2 + +def excludes_from_file(ignore_file): + excludes = [] + try: + with io.open(ignore_file, 'r', encoding='utf-8') as f: + for line in f: + if line.startswith('#'): + # ignore comments + continue + pattern = line.rstrip() + if not pattern: + # allow empty lines + continue + excludes.append(pattern) + except EnvironmentError as e: + if e.errno != errno.ENOENT: + raise + return excludes; + +def list_files(files, recursive=False, extensions=None, exclude=None): + if extensions is None: + extensions = [] + if exclude is None: + exclude = [] + + out = [] + for file in files: + if recursive and os.path.isdir(file): + for dirpath, dnames, fnames in os.walk(file): + fpaths = [os.path.join(dirpath, fname) for fname in fnames] + for pattern in exclude: + # os.walk() supports trimming down the dnames list + # by modifying it in-place, + # to avoid unnecessary directory listings. + dnames[:] = [ + x for x in dnames + if + not fnmatch.fnmatch(os.path.join(dirpath, x), pattern) + ] + fpaths = [ + x for x in fpaths if not fnmatch.fnmatch(x, pattern) + ] + for f in fpaths: + ext = os.path.splitext(f)[1][1:] + if ext in extensions: + out.append(f) + else: + out.append(file) + return out + + +def make_diff(file, original, reformatted): + return list( + difflib.unified_diff( + original, + reformatted, + fromfile='{}\t(original)'.format(file), + tofile='{}\t(reformatted)'.format(file), + n=3)) + + +class DiffError(Exception): + def __init__(self, message, errs=None): + super(DiffError, self).__init__(message) + self.errs = errs or [] + + +class UnexpectedError(Exception): + def __init__(self, message, exc=None): + super(UnexpectedError, self).__init__(message) + self.formatted_traceback = traceback.format_exc() + self.exc = exc + + +def run_clang_format_diff_wrapper(args, file): + try: + ret = run_clang_format_diff(args, file) + return ret + except DiffError: + raise + except Exception as e: + raise UnexpectedError('{}: {}: {}'.format(file, e.__class__.__name__, + e), e) + + +def run_clang_format_diff(args, file): + try: + with io.open(file, 'r', encoding='utf-8') as f: + original = f.readlines() + except IOError as exc: + raise DiffError(str(exc)) + + if args.in_place: + invocation = [args.clang_format_executable, '-i', file] + else: + invocation = [args.clang_format_executable, file] + + if args.style: + invocation.extend(['--style', args.style]) + + if args.dry_run: + print(" ".join(invocation)) + return [], [] + + # Use of utf-8 to decode the process output. + # + # Hopefully, this is the correct thing to do. + # + # It's done due to the following assumptions (which may be incorrect): + # - clang-format will returns the bytes read from the files as-is, + # without conversion, and it is already assumed that the files use utf-8. + # - if the diagnostics were internationalized, they would use utf-8: + # > Adding Translations to Clang + # > + # > Not possible yet! + # > Diagnostic strings should be written in UTF-8, + # > the client can translate to the relevant code page if needed. + # > Each translation completely replaces the format string + # > for the diagnostic. + # > -- http://clang.llvm.org/docs/InternalsManual.html#internals-diag-translation + # + # It's not pretty, due to Python 2 & 3 compatibility. + encoding_py3 = {} + if sys.version_info[0] >= 3: + encoding_py3['encoding'] = 'utf-8' + + try: + proc = subprocess.Popen( + invocation, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + **encoding_py3) + except OSError as exc: + raise DiffError( + "Command '{}' failed to start: {}".format( + subprocess.list2cmdline(invocation), exc + ) + ) + proc_stdout = proc.stdout + proc_stderr = proc.stderr + if sys.version_info[0] < 3: + # make the pipes compatible with Python 3, + # reading lines should output unicode + encoding = 'utf-8' + proc_stdout = codecs.getreader(encoding)(proc_stdout) + proc_stderr = codecs.getreader(encoding)(proc_stderr) + # hopefully the stderr pipe won't get full and block the process + outs = list(proc_stdout.readlines()) + errs = list(proc_stderr.readlines()) + proc.wait() + if proc.returncode: + raise DiffError( + "Command '{}' returned non-zero exit status {}".format( + subprocess.list2cmdline(invocation), proc.returncode + ), + errs, + ) + if args.in_place: + return [], errs + return make_diff(file, original, outs), errs + + +def bold_red(s): + return '\x1b[1m\x1b[31m' + s + '\x1b[0m' + + +def colorize(diff_lines): + def bold(s): + return '\x1b[1m' + s + '\x1b[0m' + + def cyan(s): + return '\x1b[36m' + s + '\x1b[0m' + + def green(s): + return '\x1b[32m' + s + '\x1b[0m' + + def red(s): + return '\x1b[31m' + s + '\x1b[0m' + + for line in diff_lines: + if line[:4] in ['--- ', '+++ ']: + yield bold(line) + elif line.startswith('@@ '): + yield cyan(line) + elif line.startswith('+'): + yield green(line) + elif line.startswith('-'): + yield red(line) + else: + yield line + + +def print_diff(diff_lines, use_color): + if use_color: + diff_lines = colorize(diff_lines) + if sys.version_info[0] < 3: + sys.stdout.writelines((l.encode('utf-8') for l in diff_lines)) + else: + sys.stdout.writelines(diff_lines) + + +def print_trouble(prog, message, use_colors): + error_text = 'error:' + if use_colors: + error_text = bold_red(error_text) + print("{}: {} {}".format(prog, error_text, message), file=sys.stderr) + + +def main(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + '--clang-format-executable', + metavar='EXECUTABLE', + help='path to the clang-format executable', + default='clang-format') + parser.add_argument( + '--extensions', + help='comma separated list of file extensions (default: {})'.format( + DEFAULT_EXTENSIONS), + default=DEFAULT_EXTENSIONS) + parser.add_argument( + '-r', + '--recursive', + action='store_true', + help='run recursively over directories') + parser.add_argument( + '-d', + '--dry-run', + action='store_true', + help='just print the list of files') + parser.add_argument( + '-i', + '--in-place', + action='store_true', + help='format file instead of printing differences') + parser.add_argument('files', metavar='file', nargs='+') + parser.add_argument( + '-q', + '--quiet', + action='store_true', + help="disable output, useful for the exit code") + parser.add_argument( + '-j', + metavar='N', + type=int, + default=0, + help='run N clang-format jobs in parallel' + ' (default number of cpus + 1)') + parser.add_argument( + '--color', + default='auto', + choices=['auto', 'always', 'never'], + help='show colored diff (default: auto)') + parser.add_argument( + '-e', + '--exclude', + metavar='PATTERN', + action='append', + default=[], + help='exclude paths matching the given glob-like pattern(s)' + ' from recursive search') + parser.add_argument( + '--style', + help='formatting style to apply (LLVM, Google, Chromium, Mozilla, WebKit)') + + args = parser.parse_args() + + # use default signal handling, like diff return SIGINT value on ^C + # https://bugs.python.org/issue14229#msg156446 + signal.signal(signal.SIGINT, signal.SIG_DFL) + try: + signal.SIGPIPE + except AttributeError: + # compatibility, SIGPIPE does not exist on Windows + pass + else: + signal.signal(signal.SIGPIPE, signal.SIG_DFL) + + colored_stdout = False + colored_stderr = False + if args.color == 'always': + colored_stdout = True + colored_stderr = True + elif args.color == 'auto': + colored_stdout = sys.stdout.isatty() + colored_stderr = sys.stderr.isatty() + + version_invocation = [args.clang_format_executable, str("--version")] + try: + subprocess.check_call(version_invocation, stdout=DEVNULL) + except subprocess.CalledProcessError as e: + print_trouble(parser.prog, str(e), use_colors=colored_stderr) + return ExitStatus.TROUBLE + except OSError as e: + print_trouble( + parser.prog, + "Command '{}' failed to start: {}".format( + subprocess.list2cmdline(version_invocation), e + ), + use_colors=colored_stderr, + ) + return ExitStatus.TROUBLE + + retcode = ExitStatus.SUCCESS + + excludes = excludes_from_file(DEFAULT_CLANG_FORMAT_IGNORE) + excludes.extend(args.exclude) + + files = list_files( + args.files, + recursive=args.recursive, + exclude=excludes, + extensions=args.extensions.split(',')) + + if not files: + return + + njobs = args.j + if njobs == 0: + njobs = multiprocessing.cpu_count() + 1 + njobs = min(len(files), njobs) + + if njobs == 1: + # execute directly instead of in a pool, + # less overhead, simpler stacktraces + it = (run_clang_format_diff_wrapper(args, file) for file in files) + pool = None + else: + pool = multiprocessing.Pool(njobs) + it = pool.imap_unordered( + partial(run_clang_format_diff_wrapper, args), files) + pool.close() + while True: + try: + outs, errs = next(it) + except StopIteration: + break + except DiffError as e: + print_trouble(parser.prog, str(e), use_colors=colored_stderr) + retcode = ExitStatus.TROUBLE + sys.stderr.writelines(e.errs) + except UnexpectedError as e: + print_trouble(parser.prog, str(e), use_colors=colored_stderr) + sys.stderr.write(e.formatted_traceback) + retcode = ExitStatus.TROUBLE + # stop at the first unexpected error, + # something could be very wrong, + # don't process all files unnecessarily + if pool: + pool.terminate() + break + else: + sys.stderr.writelines(errs) + if outs == []: + continue + if not args.quiet: + print_diff(outs, use_color=colored_stdout) + if retcode == ExitStatus.SUCCESS: + retcode = ExitStatus.DIFF + if pool: + pool.join() + return retcode + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/scripts/ci/e2e.sh b/scripts/ci/e2e.sh new file mode 100644 index 0000000..529eeb8 --- /dev/null +++ b/scripts/ci/e2e.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +set -eo pipefail + +cmake -B build -S . -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_BUILD_TYPE=Debug +cmake --build build --target all # noroot +sudo cmake --build build --target install diff --git a/scripts/ci/format-check.sh b/scripts/ci/format-check.sh new file mode 100644 index 0000000..bd3152e --- /dev/null +++ b/scripts/ci/format-check.sh @@ -0,0 +1,60 @@ +#!/bin/bash + +set -eo pipefail +set -x + +VCS=fossil + +function formatting-check-clang-format { + # clang-format + python3 run-clang-format.py --style file -r ibus-slimt-t8n +} + +function formatting-check-clang-tidy { + # clang-tidy + mkdir -p build + ARGS=( + -DCMAKE_EXPORT_COMPILE_COMMANDS=on + -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ + ) + + cmake -B build -S . "${ARGS[@]}" + set +e + FILES=$(find app ibus-slimt-t8n -type f) + run-clang-tidy -export-fixes build/clang-tidy.ibus-slimt-t8n.yml -fix -format -p build -header-filter="$PWD/ibus-slimt-t8n" ${FILES[@]} + CHECK_STATUS=$? + fossil diff + set -e + return $CHECK_STATUS + +} + +function formatting-check-python { + python3 -m black --diff --check scripts/ + python3 -m isort --profile black --diff --check scripts/ +} + +function formatting-check-sh { + shfmt -i 2 -ci -bn -sr -d scripts/ +} + +function formatting-check-cmake { + set +e + CMAKE_FILES=$(find -name "CMakeLists.txt" -not -path "./3rd-party/*" -not -path "build") + cmake-format ${CMAKE_FILES[@]} --check + CHECK_STATUS=$? + set -e + cmake-format ${CMAKE_FILES[@]} --in-place + fossil diff + return $CHECK_STATUS +} + +function formatting-check-iwyu { + iwyu-tool -p build slimt/* > build/iwyu.out +} + +formatting-check-clang-format +formatting-check-python +formatting-check-sh +formatting-check-cmake +formatting-check-clang-tidy diff --git a/scripts/git-export.sh b/scripts/git-export.sh new file mode 100644 index 0000000..b9189d7 --- /dev/null +++ b/scripts/git-export.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +set -eo pipefail +set -x + +REMOTE="git@github.com:jerinphilip/ibus-slimt-t8n.git" +FOSSIL=$(realpath ibus-slimt-t8n.fossil) +REPO=$(mktemp -d ibus-slimt-t8n-XXX -p /tmp) +mkdir -p $REPO +git -C $REPO init +git -C $REPO checkout -b trunk +git -C $REPO remote add origin $REMOTE + +fossil export --git --export-marks fossil.marks \ + $FOSSIL | git -C $REPO fast-import \ + --export-marks=git.marks + +# fossil export --git $FOSSIL | git -C $REPO fast-import +git -C $REPO log +git -C $REPO branch -m main +git -C $REPO push origin main --force +rm -rf $REPO diff --git a/scripts/ibus-slimt-t8n-configure.py b/scripts/ibus-slimt-t8n-configure.py new file mode 100755 index 0000000..8684244 --- /dev/null +++ b/scripts/ibus-slimt-t8n-configure.py @@ -0,0 +1,90 @@ +#!/usr/bin/env python3 +import io +import os +from argparse import ArgumentParser + +import slimt +import yaml +from slimt import REPOSITORY + + +class IBusSlimtT8nConfig: + def __init__(self): + self.models = [] + self.languages = set() + pass + + def add_model(self, model): + self.models.append(model) + direction = model["direction"] + self.languages.add(direction["source"]) + self.languages.add(direction["target"]) + + def set_default(self, model_info): + self.default = {"source": model_info["src"], "target": model_info["trg"]} + + def set_verify(self, verify): + self.verify = verify + + def export(self, path): + payload = { + "models": self.models, + "languages": list(self.languages), + "default": self.default, + "verify": self.verify, + } + + with open(path, "w+") as fp: + export = yaml.dump(payload, fp) + + +def retrieve(model_info, config_path): + dirname = os.path.dirname(config_path) + with open(config_path) as config_file: + data = yaml.safe_load(config_file) + shortlist = data.get("shortlist", None) + return { + "name": model_info["code"], + "direction": {"source": model_info["src"], "target": model_info["trg"]}, + "root": dirname, + "model": data["models"][0], + "vocabs": {"source": data["vocabs"][0], "target": data["vocabs"][-1]}, + "shortlist": shortlist[0] if shortlist else None, + } + + +if __name__ == "__main__": + parser = ArgumentParser() + repositories = REPOSITORY.available() + parser.add_argument( + "-r", + "--repositories", + nargs="+", + default=repositories, + choices=REPOSITORY.available(), + ) + + parser.add_argument("--default", type=str, required=True) + parser.add_argument("--verify", action="store_true") + + args = parser.parse_args() + config = IBusSlimtT8nConfig() + + for repository in args.repositories: + models = REPOSITORY.models(repository, filter_downloaded=True) + for model in models: + config_path = REPOSITORY.model_config_path(repository, model) + model_info = REPOSITORY.model(repository, model) + field = retrieve(model_info, config_path) + if field["shortlist"] is not None: + config.add_model(field) + + repository, model = args.default.split("/") + default_model_info = REPOSITORY.model(repository, model) + config.set_default(default_model_info) + config.set_verify(args.verify) + + home = os.getenv("HOME") + ibus_slimt_t8n_config_path = os.path.join(home, ".config", "ibus-slimt-t8n.yml") + config.export(ibus_slimt_t8n_config_path) + print("Successfully wrote configuration to", ibus_slimt_t8n_config_path)