Source code for jyotisha.custom_transliteration

#!/usr/bin/python3
#  -*- coding: utf-8 -*-

import logging
import re

from indic_transliteration import sanscript, language_code_to_script

logging.basicConfig(
  level=logging.DEBUG,
  format="%(levelname)s: %(asctime)s {%(filename)s:%(lineno)d}: %(message)s "
)


[docs]def romanise(iast_text): swapTable = {'ā': 'a', 'Ā': 'A', 'ī': 'i', 'ū': 'u', 'ê': 'e', 'ṅ': 'n', 'ṇ': 'n', 'ḍ': 'd', 'ṭ': 't', 'ṃ': 'm', 'ñ': 'n', 'ṉ': 'n', 'ṛ': 'ri', 'ś': 'sh', 'Ś': 'Sh', 'ṣ': 'sh', 'Ṣ': 'Sh', 'ḥ': '', '-': '-', ' ': '-'} roman_text = '' for char in iast_text: if char in swapTable: roman_text += swapTable[char] else: roman_text += char return roman_text.lower()
[docs]def tr(text, script, titled=True, source_script=sanscript.roman.HK_DRAVIDIAN): """ NOTE: Please don't put your custom tex/ md/ ics whatever code here and pollute core library functions. Wrap this in your own functions if you must. Functions should be atomic.""" if script in ['hk', 'itrans']: script = sanscript.roman.HK_DRAVIDIAN titled = False if text == '': return '' # TODO: Fix this ugliness. t = text.replace('~', '##~##') # Simple fix to prevent transliteration of ~ transliterated_text = sanscript.transliterate(data=t, _from=source_script, _to=script, togglers={'##'}).replace('C', 'Ch').replace('c', 'ch') if titled: transliterated_text = transliterated_text.title() if script == sanscript.TAMIL: transliterated_text = sanscript.SCHEMES[sanscript.TAMIL].apply_roman_numerals(transliterated_text) # transliterated_text = clean_tamil_Na(transliterated_text) if script.startswith('iast') or script.startswith('iso'): transliterated_text = transliterated_text.replace('ṉ', 'n') for accent_char in ['◌̥', '̂', '̥']: for _match in re.findall(accent_char + '.', transliterated_text): transliterated_text = transliterated_text.replace(_match, _match.lower()) if script == 'telugu' or script == sanscript.TELUGU: transliterated_text = transliterated_text.replace('ऩ', 'న') return transliterated_text
[docs]def sexastr2deci(sexa_str): """Converts as sexagesimal string to decimal Converts a given sexagesimal string to its decimal value Args: A string encoding of a sexagesimal value, with the various components separated by colons Returns: A decimal value corresponding to the sexagesimal string Examples: >>> sexastr2deci('15:30:00') 15.5 >>> sexastr2deci('-15:30:45') -15.5125 """ if sexa_str[0] == '-': sgn = -1.0 dms = sexa_str[1:].split(':') # dms = degree minute second else: sgn = 1.0 dms = sexa_str.split(':') decival = 0 for i in range(0, len(dms)): decival = decival + float(dms[i]) / (60.0 ** i) return decival * sgn
[docs]def clean_tamil_Na(text): output_text = re.sub('([^ ])ந', '\\1ன', text) output_text = re.sub('([-—*])ன', '\\1ந', output_text) output_text = re.sub('ன்த', 'ந்த', output_text) return output_text
[docs]def transliterate_from_language(text, language, script): if language == "ta": # Tamil names are stored in HK (because the latter differentiates between vargIya consonants!) transliterated_text = tr(text, script) else: source_script = language_code_to_script[language] transliterated_text = sanscript.transliterate(data=text, _from=source_script, _to=script, togglers={'##'}) if script == sanscript.TAMIL: transliterated_text = sanscript.SCHEMES[sanscript.TAMIL].apply_roman_numerals(transliterated_text) return transliterated_text