diff options
Diffstat (limited to 'durand-dict.el')
-rw-r--r-- | durand-dict.el | 1061 |
1 files changed, 1061 insertions, 0 deletions
diff --git a/durand-dict.el b/durand-dict.el new file mode 100644 index 0000000..0eb98e4 --- /dev/null +++ b/durand-dict.el @@ -0,0 +1,1061 @@ +;;; durand-dict.el --- An Emacs interface to dictionaries -*- lexical-binding: t; -*- + +;; Copyright (C) 2025 Jean Sévère Durand + +;; Author: Jean Sévère Durand <durand@jsdurand.xyz> +;; Keywords: convenience, emulations + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see <https://www.gnu.org/licenses/>. + +;;; Commentary: + +;; This aims to provide a little interface to work with dictionaries +;; of the choice. One can customize the dictionaries used and how we +;; parse them. +;; +;; In summary, this package lets the user send a request to some +;; specified dictionary website and parse the output into some +;; customized format, so that Emacs can function as an interface to +;; multiple dictionaries at once. The supposed benefit is to check +;; the dictionaries without opening the browser. + +;;; Code: + +;;;; Requirements + +;; structure library +(require 'cl-lib) + +;; parsing library +(require 'dom) + +;; regular expression building library +(require 'rx) + +;; URL fetching library +(require 'url) + +;; obtaining word at point +(require 'thingatpt) + +;; Multilingually reading command +;; +;; For some reason mule-cmds does not provide a package, so we simply +;; ignore errors here. +(ignore-errors (require 'mule-cmds)) + +;;;; Convenient key-bindings + +(defvar durand-dictionary-keymap (make-sparse-keymap "dictionary") + "A keymapp for dictionary-related functionaties.") + +(define-key durand-dictionary-keymap (vector ?t) #'dudict-quick-tamil) + +(define-key global-map (vector 3 ?d) durand-dictionary-keymap) +(define-key global-map (vector ?\H-d) durand-dictionary-keymap) + +;;;;; A history variable + +(defvar dudict-tamil-history nil + "History variable for the Tamil search.") + +;;;;; Quick search + +(defun dudict-quick-tamil (word include-tamilverb) + "Quickly search a Tamil WORD. +Asks the user what word to search. + +If the cursor is over a word, that word is offered as the +default. + +If INCLUDE-TAMILVERB is non-nil, also search for the Tamil Verb +website." + (interactive + (let ((def (thing-at-point 'word))) + (list + (read-multilingual-string "Word to search: " def "tamil99") + current-prefix-arg))) + (let ((methods + (cons + dudict-search-agarathi + (cond (include-tamilverb (list dudict-search-tamilverb)))))) + (dudict-search methods word #'dudict-tamil-buffer-name))) + +;;;; Derived mode + +(define-derived-mode dudict-mode org-mode "DUDICT" + "Major mode for viewing my custom dictionaries." + (setq truncate-lines nil) + (view-mode t)) + +;;;; Framework + +;;;;; Struct + +(cl-defstruct dudict-search + "Provides methods to (optionally) pre-process, to search, to +parse, and to present the search results to the user." + (pre + nil + :documentation "Preprocessing function, optional" + :type function) + (search + nil + :documentation "Searching function, required. +This should be a function that receives a word and returns the +URL to fetch." + :type function) + (parse + nil + :documentation "Parsing function, required" + :type function) + (present + nil + :documentation "Function to print the search result to the user." + :type function) + (name + "dudict" + :documentation + "Search name that is used to make the presentation buffer." + :type string)) + +;;;;; Main framework function +;; Main framework function +(defun dudict-search (search-method word present-fun) + "Search for the definition of WORD by means of SEARCH-METHOD. +SEARCH-METHOD should be a struct `dudict-search' that provides +methods to pre-process, to search, and to post-process the search +results. All this does is to integrate various methods provided. + +PRESENT-FUN should be a function that accepts the WORD and +returns the buffer name to contain the search results." + (cond + ((dudict-search-p search-method) + (setq search-method (list search-method))) + ((and (listp search-method) + (eval + (cons + 'and + (mapcar #'dudict-search-p search-method))))) + ((error "Requires a `dudict-search' struct or a list of \ +them, but found %S" + search-method))) + (cond + ((and (functionp present-fun) + (equal (func-arity present-fun) (cons 1 1)))) + ((error "Requires a function with one argument for PRESENT-FUN, \ +but got %S" + present-fun))) + (let ((present-name (funcall present-fun word)) + (ls search-method) + temp-buffer method) + (cond + ((setq temp-buffer (get-buffer present-name)) + ;; already searched + (switch-to-buffer temp-buffer)) + (t + ;; must be new buffer + (setq temp-buffer (get-buffer-create present-name)) + (while (consp ls) + (setq method (car ls)) + (setq ls (cdr ls)) + (let* ((name (dudict-search-name method)) + (preprocess (dudict-search-pre method)) + (search (dudict-search-search method)) + (parse (dudict-search-parse method)) + (present (dudict-search-present method)) + temp-buffer parse-result) + (cond + ((functionp preprocess) + (setq word (funcall preprocess word)))) + (cond + ((and + (functionp search) + (equal (func-arity search) (cons 1 1)))) + ((error + "Requires a function with one argument, but found %S" + search))) + (setq search (funcall search word)) + (setq + temp-buffer + (url-retrieve-synchronously + search t t 20)) + (cond + ((functionp parse)) + (error "Requires a function to parse, but found %S" + parse)) + (with-current-buffer temp-buffer + (setq parse-result (funcall parse))) + (kill-buffer temp-buffer) + (cond + ((and (functionp present) + (equal (func-arity present) (cons 1 1)))) + ((error + "Requires a function with one argument, but found %S" + present))) + ;; already created + (setq temp-buffer (get-buffer present-name)) + (with-current-buffer temp-buffer + (goto-char (point-max)) + (newline) + (insert "* " name) + (newline) + (funcall present parse-result)))) + (switch-to-buffer temp-buffer) + (dudict-mode) + (goto-char (point-min)))))) + +;;;; Tamil dictionary buffer name + +(defun dudict-tamil-buffer-name (word) + "Return the buffer name for holding Tamil WORD searches." + (declare (side-effect-free t) (pure t)) + (concat "தமிழ் - " word)) + +;;;; agarathi dictionary +;; For searching the agarathi dictionary, we for now do not need to +;; preprocess the search term, but in the future it might be useful to +;; add some preprocessing so that we do not need to enter in the Tamil +;; script (some sources do not always use the Tamil scripts). +;; +;; The search function just queries the website and returns the +;; buffer. +;; +;; The parsing function converts from the Document Object Model to our +;; custom structs: This is for the ease and the convenience of +;; presentation. +;; +;; Then the present function can iterate through the parse result +;; easily and present the search result to the user in a +;; well-structured format that is easy to read and understand. +;; +;; The name is surely the string "அகராதி" (अकरादि). + +;;;;; Search Struct + +(defvar dudict-search-agarathi nil + "The search struct that is used to search the agarathi dictionary.") + +(setq dudict-search-agarathi + (make-dudict-search + :name "அகராதி" + :pre nil + :search (function + (lambda (word) + (concat "https://agarathi.com/word/" word))) + :parse #'dudict-parse-agarathi + :present #'dudict-present-agarathi)) + +;;;;; Short-cut + +(defun dudict-search-agarathi (word) + "A short-cut that passes `dudict-search-agarathi' and WORD to +`dudict-search'." + (interactive + (read-string "Enter search term: " nil 'dudict-tamil-history)) + (dudict-search + dudict-search-agarathi word #'dudict-tamil-buffer-name)) + +;;;;; Structures of the dictionary website + +;;;;;; The entire time line +(cl-defstruct agarathi-timeline + "An entire agarathi search result is called a timeline. +It contains a list of panels." + (panels + nil + :documentation "The list of panels contained in the timeline." + :type list)) + +;; verifying function +(defun agarathi-check-valid-timeline (timeline) + "Check that an object is a valid timeline structure." + (declare (side-effect-free t) (pure t)) + (and + (agarathi-timeline-p timeline) + (let ((ls (agarathi-timeline-panels timeline)) + (result t) + temp) + (and + (listp ls) + (progn + (while (consp ls) + (setq temp (car-safe ls)) + (setq ls (cdr-safe ls)) + (cond + ((agarathi-check-valid-panel temp)) + (t + (setq result nil) + (setq ls nil)))) + result))))) + +;;;;;; A panel +(cl-defstruct agarathi-panel + "A panel contains the search results from some single source. +It contains a title and a list of descriptions, where each +description corresponds to a set of definitions, roughly +speaking." + (title + "" + :documentation "The title of the panel. +Usually this is the source of the definition. When this is a +pure advertisement panel, its title would be \"Sponsored Links\"." + :type string) + (descriptions + nil + :documentation "The list of descriptions contained in the panel." + :type list)) + +;; verifying function +(defun agarathi-check-valid-panel (panel) + "Check that an object is a valid panel structure." + (declare (side-effect-free t) (pure t)) + (and + (agarathi-panel-p panel) + (let ((title (agarathi-panel-title panel)) + (ls (agarathi-panel-descriptions panel)) + (result t) + temp) + (and + (stringp title) + (listp ls) + (progn + (while (consp ls) + (setq temp (car-safe ls)) + (setq ls (cdr-safe ls)) + (cond + ((agarathi-check-valid-description temp)) + (t + (setq result nil) + (setq ls nil)))) + result))))) + +;;;;;; A Description +(cl-defstruct agarathi-description + "A description contains some definitions along with some optional +examples. + +It contains a title along with a list of definitions and a list +of examples. + +Definitions and examples are basically strings." + (title + "" + :documentation "The title of a description. +This in the convention of the dictionary website means the word +that this description is describing. Since the dictionary can +return descriptions that are not exactly equal to the searched +term, it is necessary to keep this information for every +description." + :type string) + (definitions + nil + :documentation "The list of definitions contained in the description." + :type list) + (examples + nil + :documentation "The list of examples contained in the description." + :type list)) + +;; verifying function +(defun agarathi-check-valid-description (description) + "Check that an object is a valid description structure." + (declare (side-effect-free t) (pure t)) + (and + (agarathi-description-p description) + ;; check definitions + (let ((title (agarathi-description-title description)) + (ls (agarathi-description-definitions description)) + (result t) + temp) + (and + (stringp title) + (listp ls) + (progn + (while (consp ls) + (setq temp (car-safe ls)) + (setq ls (cdr-safe ls)) + (cond ((stringp temp)) (t (setq result nil) (setq ls nil)))) + result))) + ;; check examples + (let ((ls (agarathi-description-examples description)) + (result t) + temp) + (and + (listp ls) + (progn + (while (consp ls) + (setq temp (car-safe ls)) + (setq ls (cdr-safe ls)) + (cond ((stringp temp)) (t (setq result nil) (setq ls nil)))) + result))))) + +;;;;; main function + +(defun dudict-parse-agarathi () + "Parse the search result from the agarathi website. +The current buffer is assumed to contain the search results already." + (let* ((dom (libxml-parse-html-region (point-min) (point-max))) + (timeline-ul + (car-safe + (delq + nil + (mapcar + (lambda (node) + (cond ((string= (dom-attr node 'class) + "timeline") + node))) + (dom-by-tag dom 'ul)))))) + (cond + ((null timeline-ul) + (message "No timeline is found.") + nil) + ((dudict-parse-agarathi-timeline timeline-ul))))) + +;;;;; Archived code +;; (defun dudict-search-agarathi (word) +;; "Request for the definition of Tamil WORD from the dictionary +;; website agarathi. + +;; WORD should be acceptable by agarathi: no processing is done to +;; convert from some transliteration to the Tamil script." +;; (let* ((name (concat "dudict - " word)) +;; (buffer-found (get-buffer name)) +;; (buffer +;; (cond +;; (buffer-found) +;; ((url-retrieve-synchronously +;; (concat "https://agarathi.com/word/" word) +;; t t 20)))) +;; result titles count) +;; (cond +;; ((not buffer-found) +;; (with-current-buffer buffer +;; (let* ((dom (libxml-parse-html-region +;; (point-min) (point-max))) +;; (timeline-ul (delq +;; nil +;; (mapcar +;; (lambda (node) +;; (cond ((string= (dom-attr node 'class) +;; "timeline") +;; node))) +;; (dom-by-tag dom 'ul)))) +;; (timelines (delq +;; nil +;; (mapcar +;; (lambda (node) +;; (let ((class (dom-attr node 'class))) +;; (cond +;; ((or +;; (string= class "timeline-normal") +;; (string= +;; class "timeline-inverted")) +;; node)))) +;; (dom-by-tag timeline-ul 'li)))) +;; (timeline-panels +;; (mapcar +;; (lambda (node) (dom-by-class node "timeline-panel")) +;; timelines)) +;; (timeline-panels +;; (delq +;; nil +;; (mapcar (lambda (node) +;; (let ((result +;; (dudict-parse-agarathi-panel node))) +;; (cond +;; ((string= +;; (cadr result) +;; "Sponsored Links") +;; nil) +;; (result)))) +;; timeline-panels)))) +;; (setq result timeline-panels) +;; (kill-buffer buffer))) +;; (setq buffer (get-buffer-create name)) +;; (with-current-buffer buffer +;; (delete-region (point-min) (point-max)) +;; (goto-char (point-min)) +;; (mapc +;; (lambda (panel) +;; (setq titles (car panel)) +;; (setq panel (cdr panel)) +;; ;; insert panel source +;; (insert "* " (car panel)) +;; (setq panel (cdr panel)) +;; (setq count 0) +;; (mapc +;; (lambda (def) +;; (newline) +;; (insert +;; (format +;; "** %s\n\n" +;; (dom-text (nth count titles)))) +;; (setq count (1+ count)) +;; (mapc +;; (lambda (def) +;; (insert +;; (mapconcat +;; #'identity +;; (split-string def "\n" t "\\s-+") +;; " ") +;; " ") +;; ;; (newline) +;; ) +;; def) +;; (newline)) +;; panel) +;; (newline 2)) +;; result)))) +;; (switch-to-buffer buffer) +;; (dudict-mode) +;; (goto-char (point-min)))) + +;;;;; Parse a timeline +(defun dudict-parse-agarathi-timeline (dom) + "Convert a DOM object into a timeline structure. +See the documentation for the cl-type 'agarathi-timeline' to know more +about the timeline structure." + (let* ((panels (dom-by-class dom "timeline-panel")) + (parsed-panels (mapcar #'dudict-parse-agarathi-panel panels)) + (filtered-panels + (delq + nil + (mapcar + (lambda (panel) + (cond + ((dudict-agarathi-is-ad-panel panel) nil) + (panel))) + parsed-panels)))) + (make-agarathi-timeline :panels filtered-panels))) + +;;;;; Filter out ads +(defun dudict-agarathi-is-ad-panel (panel) + "If PANEL is a pure advertisement panel, return t, else return +nil." + (declare (side-effect-free t) (pure t)) + (and + ;; just in case + (agarathi-panel-p panel) + (string= (agarathi-panel-title panel) "Sponsored Links"))) + +;;;;; Parse a panel + +(defun dudict-parse-agarathi-panel (dom) + "Convert a DOM object into a panel structure. +See the documentation for the cl-type 'agarathi-panel' to know more +about the panel structure." + (declare (side-effect-free t) (pure t)) + (let* ((title + (cond + ((car-safe (dom-by-class dom "timeline-title"))) + ("No title found"))) + (title + (cond + ((listp title) (dom-texts title)) + (title))) + (timeline-body (car-safe (dom-by-class dom "timeline-body"))) + (style (cond ((string= + (dom-attr timeline-body 'id) + "winslow") + 'quoted) + ('direct))) + (descriptions + (dudict-parse-agarathi-descriptions timeline-body style))) + (make-agarathi-panel + :title title + :descriptions descriptions))) + +;; (defun dudict-parse-agarathi-panel (dom) +;; "Convert a DOM object into a panel structure. +;; See the documentation for the cl-type 'agarathi-panel' to know more +;; about the panel structure." +;; (let* ((panel (car dom)) +;; (panel-title +;; (dom-texts (car (dom-by-class panel "timeline-title")))) +;; (titles (delq +;; nil +;; (mapcar +;; (lambda (node) +;; (cond ((dom-attr node 'class) nil) (node))) +;; (dom-by-tag panel 'h3)))) +;; (description +;; (delq +;; nil +;; (mapcar +;; (lambda (desc) +;; (mapcar +;; (lambda (node) +;; (cond +;; ((listp node) (dom-texts node)) +;; ((stringp node) node))) +;; (dom-children desc))) +;; (dom-by-class panel "description"))))) +;; (cons +;; titles +;; (cons panel-title (mapcar #'flatten-list description))))) + +;;;;; Parse descriptions +(defun dudict-parse-agarathi-descriptions (dom style) + "Convert a DOM object into a list of description structures in the +STYLE style. + +See the documentation of the function +`dudict-parse-agarathi-description' for how STYLE is needed. + +See the documentation for the cl-type 'agarathi-description' to know +more about the description structure." + (declare (side-effect-free t) (pure t)) + ;; We walk the children of the document object model. + ;; + ;; The basic procedure is that, when we encounter a 'h3' tag, the + ;; text of that tag is the title of the following 'div' tag, which + ;; must have the class 'description'. + (let* ((children (dom-children dom)) + (title "No title found") + child description result) + (while (consp children) + (setq child (car-safe children)) + (setq children (cdr-safe children)) + (cond + ((not (listp child))) + ((eq (dom-tag child) 'h3) (setq title (dom-text child))) + ((string= (dom-attr child 'class) "description") + (setq description (dudict-parse-agarathi-description + child style)) + ;; Actually the parse result contains only the definitions and + ;; examples. + (setq description + (make-agarathi-description + :title title + :definitions (car-safe description) + :examples (cdr-safe description))) + (setq result (cons description result))))) + (reverse result))) + +;;;;; Parse one description +(defun dudict-parse-agarathi-description (dom style) + "Convert a list of DOM objects into a description structure in the +STYLE style. + +STYLE is either `direct' or `quoted'. Since some dictoinaries +present the definitions as direct children of the 'description' +tag, while some present them as the 'blockquote' children, we +must have this information STYLE to correctly parse the +descriptions. + +Note that the title is not parsed here, but in the function +`dudict-parse-agarathi-descriptions', so we only return the cons +of the definitions and examples contained in the description. + +See the documentation for the cl-type 'agarathi-description' to +know more about the description structure." + (declare (side-effect-free t) (pure t)) + (cond + ((listp dom)) + ((error "DOM should be a list, but got %S: %S" + (type-of dom) dom))) + (cond + ((memq style (list 'direct 'quoted))) + ((error + "STYLE should be either 'direct' or 'quoted', but got %S" + style))) + (cond + ((eq style 'quoted) + (setq dom (car-safe (dom-by-tag dom 'blockquote))))) + ;; We walk the children of the document object model. + ;; + ;; Basically, if we encounter an element that is not a 'blockquote' + ;; tag, we store all of these texts into a string and later parse + ;; them into definitions. When we encounter 'blockquote' tags, we + ;; store them into examples instead. + (let* ((children (dom-children dom)) + (temp-string (string)) + definitions examples child) + (while (consp children) + (setq child (car-safe children)) + (setq children (cdr-safe children)) + (cond + ((not + (and + (listp child) + (eq (dom-tag child) 'blockquote))) + (setq temp-string + (concat temp-string + (cond + ((listp child) (dom-texts child)) + ((format "%s" child)))))) + ((cond + ((null definitions) + (setq definitions + (dudict-parse-agarathi-definitions temp-string)) + (setq temp-string (string))) + ((setq examples (cons (dom-texts child) examples))))))) + (cond + ((and + (not (string-empty-p temp-string)) + (null definitions)) + (setq definitions + (dudict-parse-agarathi-definitions temp-string)))) + (setq examples (reverse examples)) + (cons definitions examples))) + +;;;;; Parse into definitions +(defun dudict-parse-agarathi-definitions (str) + "Parse a string STR into a list of definitions. +For now this just looks for the following format. + +SPACE NUMBER DOT SPACE + +More precise parsing might added in the future, if such needs +arise." + (declare (side-effect-free t) (pure t)) + (save-match-data + (let* ((len (length str)) + (old-index 0) + (index 0) + result) + (while (< index len) + (cond + ((setq index + (string-match + (rx-to-string + (list 'seq + 'space + (list 'one-or-more 'digit) + ?. 'space) + t) + str index)) + (setq result + (cons + (substring-no-properties + str old-index index) + result)) + (setq old-index index) + (setq index (+ index 4))) + ((setq index len) + (setq result + (cons + (substring-no-properties str old-index) + result))))) + (reverse result)))) + +;;;;; Insert the parse result into buffer +(defun dudict-present-agarathi (timeline) + "Insert the TIMELINE structure into the current buffer." + (cond + ((agarathi-timeline-p timeline)) + ((error "Requires a timeline, but found %S" timeline))) + (let* ((panels (agarathi-timeline-panels timeline)) + panel + descriptions description + definitions examples) + (while (consp panels) + (setq panel (car panels)) + (setq panels (cdr panels)) + (insert "** " (agarathi-panel-title panel)) + (newline) + (setq descriptions (agarathi-panel-descriptions panel)) + (while (consp descriptions) + (setq description (car descriptions)) + (setq descriptions (cdr descriptions)) + (insert + "*** " + (dudict-trim (agarathi-description-title description))) + (newline) + (setq definitions + (agarathi-description-definitions description)) + (mapc + (lambda (def) (insert "- " (dudict-trim def)) (newline)) + definitions) + (newline) + (setq examples + (agarathi-description-examples description)) + (cond + (examples + (insert "**** Examples") + (newline) + (mapc + (lambda (ex) (insert "+ " (dudict-trim ex)) (newline)) + examples))))))) + +(defun dudict-trim (str) + "Replace any block of spaces from STR by one space." + (declare (side-effect-free t) (pure t)) + (replace-regexp-in-string + (rx-to-string + (list 'seq (list 'one-or-more 'space)) + t) + (string 32) + str)) + +;;;; Tamilverb dictionary + +;;;;; Search Struct + +(defvar dudict-search-tamilverb nil + "The search struct that is used to search the Tamil Verb \ +dictionary.") + +(setq dudict-search-tamilverb + (make-dudict-search + :name "TamilVerb" + :pre nil + :search (function + (lambda (word) + (concat "https://tamilverb.com/?q=" word))) + :parse #'dudict-parse-tamilverb + :present #'dudict-present-tamilverb)) + +;;;;; Short-cut + +(defun dudict-search-tamilverb (word) + "A short-cut that passes `dudict-search-tamilverb' and WORD to +`dudict-search'." + (interactive + (read-string "Enter search term: " nil 'dudict-tamil-history)) + (dudict-search + dudict-search-tamilverb word #'dudict-tamil-buffer-name)) + +;;;;; Structures + +;;;;;; All entries + +(cl-defstruct tamilverb-entries + "A list of entries." + (ls + nil + :documentation "The list of contained entries. +Each entry has the type `tamilverb-entry'." + :type list)) + +(defun tamilverb-check-valid-entries (entries) + "Check that an object is a valid entries structure." + (declare (side-effect-free t) (pure t)) + (and + (tamilverb-entries-p entries) + (let ((ls (tamilverb-entries-ls entries)) + (result t) + temp) + (and + (listp ls) + (progn + (while (consp ls) + (setq temp (car-safe ls)) + (setq ls (cdr-safe ls)) + (cond + ((tamilverb-check-valid-entry temp)) + (t + (setq result nil) + (setq ls nil)))) + result))))) + +;;;;;; One entry + +(cl-defstruct tamilverb-entry + "An entry of the definition. +It contains the headword, which is a short summary of the definition. + +Then it contains the verb class, and some \"sub-words\", which +are alternate (spoken) spellings. + +Then follow the detailed definitions. + +Finally it contains one usage example and optionally some +synonyms." + (head + "" + :documentation "A short summary." + :type string) + (class + 0 + :documentation "The verb class of the verb." + :type integer) + (transitivity + nil + :documentation "The transitivity of the verb." + :type bool) + (def + "" + :documentation "Detailed definition." + :type string) + (sub + "" + :documentation "Spellings" + :type string) + (ex + "" + :documentation "A usage example" + :type string) + (synonyms + nil + :documentation "Some synonyms" + :type list)) + +(defun tamilverb-check-valid-entry (entry) + "Check that an object is a valid entry structure." + (declare (side-effect-free t) (pure t)) + (and + (tamilverb-entry-p entry) + (let ((head (tamilverb-entry-head entry)) + (class (tamilverb-entry-class entry)) + (trans (tamilverb-entry-transitivity entry)) + (def (tamilverb-entry-def entry)) + (sub (tamilverb-entry-sub entry)) + (ex (tamilverb-entry-ex entry)) + (synonyms (tamilverb-entry-synonyms entry))) + (and + (stringp head) + (integerp class) + (or (null trans) (equal trans t)) + (stringp def) + (stringp sub) + (stringp ex) + (listp synonyms) + (eval (cons #'and (mapcar #'stringp synonyms))))))) + +;;;;; main function + +(defun dudict-parse-tamilverb () + "Parse the search result from the Tamil Verb website. +The current buffer is assumed to contain the search results already." + ;; ;; First re-insert the results, as the Tamil Verb website escapes + ;; ;; the characters for some reason. + ;; (let ((dom (libxml-parse-html-region (point-min) (point-max)))) + ;; (delete-region (point-min) (point-max)) + ;; (dom-print dom)) + (let* ((dom (libxml-parse-html-region (point-min) (point-max))) + (entries (dom-by-tag dom 'entry)) + (result (make-tamilverb-entries))) + (cond + ((null entries)) + ((setq + result + (make-tamilverb-entries + :ls (mapcar #'dudict-parse-tamilverb-entry entries))))) + result)) + +;;;;; Parse one entry + +(defun dudict-parse-tamilverb-entry (entry) + "Convert ENTRY to a `tamilverb-entry' struct." + (declare (side-effect-free t) (pure t)) + (let* ((head (dom-text (car (dom-by-tag entry 'headword)))) + (class (dom-text (car (dom-by-tag entry 'verbclass)))) + (trans (cond + ((string-match-p "intr" class) nil) + (t))) + (class (string-to-number + (save-match-data + (string-match (rx-to-string (list 'seq 'digit) t) + class) + (match-string 0 class)))) + (def (dom-text (car (dom-by-tag entry 'definition)))) + (sub (dom-texts (car (dom-by-tag entry 'subword)))) + (ex (dom-texts + (car + (dom-by-class + (car (dom-by-tag entry 'usage)) + (rx-to-string (list 'seq 'bos "detail" 'eos) t))))) + (synonyms (dom-texts + (car + (dom-by-class + (car (dom-by-tag entry 'synonyms)) + (rx-to-string + (list 'seq 'bos "detail" 'eos) t)))))) + (make-tamilverb-entry + :head head + :class class + :transitivity trans + :def def + :sub sub + :ex ex + :synonyms synonyms))) + +;;;;; De-escaping function + +(defun dudict-de-escape (str) + "If STR contains slices of the form \"&#xNNN;\", where NNN are +digits, replace those slices by UNICODE characters with the +corresponding UNICODE code point. + +Tamil Verb website escapes the Tamil Script in this way, maybe to +suit some legacy devices?" + (declare (side-effect-free t) (pure t)) + (let ((s str) + (regex (rx-to-string + (list + 'seq "&#x" + (list 'group (list 'one-or-more 'hex)) + ?\;) + t))) + (save-match-data + (while (string-match regex s) + (setq + s + (replace-match + (make-string 1 (string-to-number (match-string 1 s) 16)) + t t s)))) + s)) + +;;;;; Insert the parse result + +(defun dudict-present-tamilverb (entries) + "Insert the entries into the current buffer." + (cond + ((tamilverb-entries-p entries)) + ((error "Requires a tamilverb-entries, but found %S" entries))) + (let* ((entries (tamilverb-entries-ls entries)) + entry head class trans def ex syn) + (cond + ((null entries) + (insert "Not found") + (newline))) + (while (consp entries) + (setq entry (car entries)) + (setq entries (cdr entries)) + (insert "** " (tamilverb-entry-head entry)) + (newline) + (insert + (format "Class: %d" (tamilverb-entry-class entry))) + (newline) + (insert + (format "Transitivity: %S" + (tamilverb-entry-transitivity entry))) + (newline) + (insert + (dudict-trim (dudict-de-escape (tamilverb-entry-sub entry)))) + (newline 2) + (insert (tamilverb-entry-def entry)) + (newline) + (insert "*** Example Usage") + (newline) + (insert + (dudict-trim (dudict-de-escape (tamilverb-entry-ex entry)))) + (newline) + (insert "*** Synonyms") + (newline) + (insert + (dudict-trim + (dudict-de-escape (tamilverb-entry-synonyms entry)))) + (newline)))) + +;;;; Wiktionary search + +;; TODO: Take the search from https://en.wiktionary.org/ into the +;; considerations, as sometimes this is more reliable and includes +;; more modern usages. + +(provide 'durand-dict) +;;; durand-dict.el ends here |