\title[foss.my 2009]{Software Internationalization (i18n) \emph{with GNU gettext}}
\author [Muhammad Najmi]{Muhammad Najmi Ahmad Zabidi}

\title{Software Internationalization (i18n) with \emph{GNU gettext}}
\author {Muhammad Najmi Ahmad Zabidi}
\date{25th October 2009}
\institute {Department of Computer Science\\
KICT, International Islamic University Malaysia,\\
\begin{center} \texttt{najmi@kict.iiu.edu.my}\\ 
        \footnotesize{\textrm{Created with \LaTeX{} \\}}





\begin{frame}[shrink]{About the speaker}
                \item<1-> KDE Subversion committer and original translator to ms\mbox{\_}MY
                \item<2-> Translation CVS committer for Tuxpaint project
                \item<3-> Occasionally sending translation for GNU ms\mbox{\_}MY, (Munsyi Project), lead by Sharuzzaman

\onslide <1-> \pgfputat{\pgfxy(5.5,-3.5)}{\pgfbox[top,right]{\includegraphics[scale=0.2]{./kde.png}}}
\onslide <2->\pgfputat{\pgfxy(10.25,-3.5)}{\pgfbox[top,right]{\includegraphics[scale=0.2]{./tux.png}}}
\onslide <3->\pgfputat{\pgfxy(0.5,-3.5)}{\pgfbox[top,right]{\includegraphics[scale=0.17]{./gnu.png}}}


   What actually internationalization is:
   \item Software going global
   \item Software package getting ``world'' acceptance, thus people willing to localize it
   \item Needs i18n support first!

\subsection{Localization vs Internationalization}
\begin{frame}{The differences\ldots}
        \begin{alertblock}{ Localization} 
                \item  Done by translator
                \item It can be by the user/Developer
                \item Submit to package maintainer
                \item Get credit
                \item Developer
                \item Prepare package
                \item Merge package
                \item Update package
                \item Announce new package
                \item Give proper credit


 \begin{exampleblock}{gettext's features}
\item Part of GNU packages
\item Enables internationalization of software
\item Enables the creation of Portable Object (PO) file

\emph{Portable Object?}

\begin{frame}\frametitle{Gettext features}
        \begin{block}{Why gettext?}
        \item Supports major characters encoding \ldots
        \item UTF-8 for an example
        \item KDE,Gnome, Squirrelmail use it
        \item Relatively to update and maintain

\section{Task of developer/translator}
\frametitle{What developers and translators should do?}

\section{Flow of the translation process}
\frametitle{Flow of Translation}

\subsection{PO file}
\frametitle{Portable Object (PO) files}

\begin{block}{PO file's features}
\item It's a raw, untranslated file
\item Created automatically using gettext package
\item Ready to be translated

\subsection{MO file}
\frametitle{Machine Object (MO) files}
\begin{alertblock}{MO file's features}
\item Compiled file, derived from PO
\item It's a binary, thus \emph{machine readable}

\subsection{Location of MO file}
\begin{frame}[fragile]{Default \mbox{\&} customized location}
        \item By default Linux refers to
        \item Customized directory
najmi@notre-dame:/var/www$ tree ms
    |-- greetings.mo
    |-- index.mo
    `-- messages.mo

\subsection{Creating MO file}
\begin{frame}{Generating Machine Object file}
                \item Use msgfmt - message format
                \item This will generate default name, message.mo, unless you specify it
                \item You can trash it out, given you just want to check the localized stats to \texttt {/dev/null}
                \item It can be reverted back, use msgunfmt

\begin{frame}{Intro to locale}
                        \item Locale is the local setting of a particular country, race, venue etc
                        \item For Bahasa Melayu, the assigned locale is ms\mbox{\_}MY, where ms is for Malay, and MY is for Malaysia
                        \item As for now, since Indonesia is using ``id'' , so sometimes ms is just fine
                        \item Date sequence, for .eg dd/mm/yyyy is also fall under locale . We also do not have Daylight Saving Time(DST), that is also locale
                        Locale can be simply viewed by typing \texttt{\mbox{\$}locale} 

\begin{frame}{Some stuffs on chartset}
                \item Different charset support different language, characters
                \item Includes Roman, Arabic, CJK and etc
                                \item UTF-8, UTF-16 etc
                \item I don't really know on this stuffs, only know Roman based and Arabic based chars


\subsection{Sample of C++ file}
\frametitle{How's the \emph{C++} file looks like?}
\begin{frame}[fragile]{Executable output}
$ ./hello
Hello, world!
How are you?



$ export LC_ALL=ms_MY.UTF-8

$ ./hello
Assalamualaikum, dunia!
Awak apa khabar?

        \begin{exampleblock}{About xgettext}
        \item Part of gettext
        \item Extracting translatable strings from source code

                xgettext -d lang lang.php
        \centering{This will create lang.po, with respect to strings in lang.php}

\subsection{Sample of the POT file}
\frametitle{How's the \alert{\emph{raw}} POT file looks like?}

\subsection{Sample of the translated PO file}
\frametitle{How's the \alert{\emph{translated}} PO file looks like?}

\subsection{gettext in Python}

        \frametitle {python!}
        {\lstinputlisting [language=python]

\subsection{gettext in Python}
        \frametitle {Traslatable strings from Python file}

\subsection{gettext in Python}
        \frametitle {Python executable}
$ python piton.py
python mudah
semudah ini

$ export LC_ALL=C

$ python piton.py
python is simple
as simple as this

\subsection{PHP source code}
        \frametitle{Let's see for PHP}
        {\lstinputlisting [language=PHP]
\subsection{PO file generated from the PHP file\ldots}
        \frametitle{Let's see for PHP}
                \item The following is the example of snipped file - without header

\subsection{Ideal PHP code for i18n}
\begin{frame}[fragile]{Avoid confusing translator}
                \item Try to put strings in a minimal line
                \item Avoid separation of sentence


\subsubsection{The ideal code is \ldots}
\begin{frame}[fragile]{Use ``sprintf''}
        \centering{Use the following \ldots}\\
                echo sprintf(_(``The settings of %s is working fine''),$language);

        \centering{instead of \ldots}\\

        echo _(``The settings of '').$language._(`` is working fine");

\subsection{Sample confusing strings}
\begin{frame}[fragile]{Separated into two}

        echo _(``The settings of '').$language._(`` is working fine");

        \centering{will generate\ldots}\\
        #: lang.php:11
        msgid ``The settings of ''
        msgstr ``
        #: lang.php:11
        msgid '' is working fine``
        msgstr ''``''

        \centering{which separates sentence into two different strings\ldots}\\

\subsection{The ideal coding}

                echo sprintf(_(``The settings of %s is working fine''),$language);
        \centering{since this will generate\ldots}\\
        msgid ``The settings of %s is working fine''
        msgstr ``
\subsection{Plural issues}
\begin{frame}[shrink]{What if we have plural noun\ldots}
                \item Plural is when you have different noun for singular and plural

                        \item <1-> {\alert{ cat}}
                        \item <2->{\alert{ boy}}
                        \item<3->{\alert{ man}}

                        \item <4->{\alert{ basikal}}
                        \item <5->{\alert{ pensil}}
                        \item <6->{\alert{ komputer}}

                        \item <1->{\alert {cats}}
                        \item <2->{\alert {boys}}
                        \item <3->{\alert {men}}

                        \item <4->{\alert{ basikal-basikal}}
                        \item <5->{\alert {pensil-pensil}}
                        \item <6->{\alert {komputer-komputer}}


\centering{However, as far as I know, plural issue in Malay languge is non trivial issue, so for the sake of easiness (perhaps, lazy?) plural = 0}

%\subsection{Plural issues}
\subsection{Using ngettext in PHP}

2 boys are eating
1 cat falls
1 tingkap ditutup
Files are good

\subsection{Using ngettext in PHP}
        \frametitle{Sample PO}


\subsection{Internationalization tools}
\frametitle{Internationalization tools}
\item gted (very recent)
\item html2po / po2html

\subsection{Localization tools}
\frametitle{Localization tools}

\item POedit
\item Kbabel (now lokalize)
\item VI? Emacs ... the list may infinite up to \emph{n} \mbox{\#} of tools
\item Incoming \ldots wish for PO Live Edit, prototype is webl10n by gandalf
\item Pootle


\section{Route of PO to MO}
        \frametitle{PO to MO journey}

\begin{itemize}[<+-| alert@+>]
    \item Translate
        \tikz[na] \node[coordinate] (n1) {};

            \node[fill=blue!20,anchor=base] (t1)
            {$PO\ file$};
            } ->
            \node[fill=red!20, ellipse,anchor=base] (t2)
            {$MO\ file$};
            } ->
            \node[fill=green!20,anchor=base] (t3)
            {$Working\ Interface$};

\begin{itemize}[<+-| alert@+>]
    \item Compile
        \tikz[na]\node [coordinate] (n2) {};
    \item Invoke
        \tikz[na]\node [coordinate] (n3) {};

        \path[->]<1-> (n1) edge [bend left] (t1);
        \path[->]<2-> (n2) edge [out=0, in=-90] (t2);
        \path[->]<3-> (n3) edge [out=0, in=-90] (t3);

\section{Issues with package generated by gettext}
\begin{alertblock}{What are the issues?}
        \item Software may become bloated
        \item So, the user may not install \emph{everything} in the first place!
        \item Mozilla has its own way\ldots{
                \item I saw moz2po and po2moz \ldots a good news, perhaps?
                \item Bug filed \mbox{\#}501988 seems a good news
        \item I also have problem with UTF-8 on console.. garbled characters
                        \item Then, I \texttt{export LC\mbox{\_}ALL=C}, of course I'm not happy with this
                \item But most of the time, it works\ldots

\begin{frame}{gettext rules}


\centering{\textbf \textit{{Questions?}}}\\

\begin{RLtext}hal `indakum as-su'Al?\\

\centering{{\color{red}\texttt {najmi\mbox{\{}at\mbox{\}} kict.iiu.edu.my}}}
