summaryrefslogtreecommitdiff
path: root/durand-dict.el
diff options
context:
space:
mode:
Diffstat (limited to 'durand-dict.el')
-rw-r--r--durand-dict.el1061
1 files changed, 1061 insertions, 0 deletions
diff --git a/durand-dict.el b/durand-dict.el
new file mode 100644
index 0000000..0eb98e4
--- /dev/null
+++ b/durand-dict.el
@@ -0,0 +1,1061 @@
+;;; durand-dict.el --- An Emacs interface to dictionaries -*- lexical-binding: t; -*-
+
+;; Copyright (C) 2025 Jean Sévère Durand
+
+;; Author: Jean Sévère Durand <durand@jsdurand.xyz>
+;; Keywords: convenience, emulations
+
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with this program. If not, see <https://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;; This aims to provide a little interface to work with dictionaries
+;; of the choice. One can customize the dictionaries used and how we
+;; parse them.
+;;
+;; In summary, this package lets the user send a request to some
+;; specified dictionary website and parse the output into some
+;; customized format, so that Emacs can function as an interface to
+;; multiple dictionaries at once. The supposed benefit is to check
+;; the dictionaries without opening the browser.
+
+;;; Code:
+
+;;;; Requirements
+
+;; structure library
+(require 'cl-lib)
+
+;; parsing library
+(require 'dom)
+
+;; regular expression building library
+(require 'rx)
+
+;; URL fetching library
+(require 'url)
+
+;; obtaining word at point
+(require 'thingatpt)
+
+;; Multilingually reading command
+;;
+;; For some reason mule-cmds does not provide a package, so we simply
+;; ignore errors here.
+(ignore-errors (require 'mule-cmds))
+
+;;;; Convenient key-bindings
+
+(defvar durand-dictionary-keymap (make-sparse-keymap "dictionary")
+ "A keymapp for dictionary-related functionaties.")
+
+(define-key durand-dictionary-keymap (vector ?t) #'dudict-quick-tamil)
+
+(define-key global-map (vector 3 ?d) durand-dictionary-keymap)
+(define-key global-map (vector ?\H-d) durand-dictionary-keymap)
+
+;;;;; A history variable
+
+(defvar dudict-tamil-history nil
+ "History variable for the Tamil search.")
+
+;;;;; Quick search
+
+(defun dudict-quick-tamil (word include-tamilverb)
+ "Quickly search a Tamil WORD.
+Asks the user what word to search.
+
+If the cursor is over a word, that word is offered as the
+default.
+
+If INCLUDE-TAMILVERB is non-nil, also search for the Tamil Verb
+website."
+ (interactive
+ (let ((def (thing-at-point 'word)))
+ (list
+ (read-multilingual-string "Word to search: " def "tamil99")
+ current-prefix-arg)))
+ (let ((methods
+ (cons
+ dudict-search-agarathi
+ (cond (include-tamilverb (list dudict-search-tamilverb))))))
+ (dudict-search methods word #'dudict-tamil-buffer-name)))
+
+;;;; Derived mode
+
+(define-derived-mode dudict-mode org-mode "DUDICT"
+ "Major mode for viewing my custom dictionaries."
+ (setq truncate-lines nil)
+ (view-mode t))
+
+;;;; Framework
+
+;;;;; Struct
+
+(cl-defstruct dudict-search
+ "Provides methods to (optionally) pre-process, to search, to
+parse, and to present the search results to the user."
+ (pre
+ nil
+ :documentation "Preprocessing function, optional"
+ :type function)
+ (search
+ nil
+ :documentation "Searching function, required.
+This should be a function that receives a word and returns the
+URL to fetch."
+ :type function)
+ (parse
+ nil
+ :documentation "Parsing function, required"
+ :type function)
+ (present
+ nil
+ :documentation "Function to print the search result to the user."
+ :type function)
+ (name
+ "dudict"
+ :documentation
+ "Search name that is used to make the presentation buffer."
+ :type string))
+
+;;;;; Main framework function
+;; Main framework function
+(defun dudict-search (search-method word present-fun)
+ "Search for the definition of WORD by means of SEARCH-METHOD.
+SEARCH-METHOD should be a struct `dudict-search' that provides
+methods to pre-process, to search, and to post-process the search
+results. All this does is to integrate various methods provided.
+
+PRESENT-FUN should be a function that accepts the WORD and
+returns the buffer name to contain the search results."
+ (cond
+ ((dudict-search-p search-method)
+ (setq search-method (list search-method)))
+ ((and (listp search-method)
+ (eval
+ (cons
+ 'and
+ (mapcar #'dudict-search-p search-method)))))
+ ((error "Requires a `dudict-search' struct or a list of \
+them, but found %S"
+ search-method)))
+ (cond
+ ((and (functionp present-fun)
+ (equal (func-arity present-fun) (cons 1 1))))
+ ((error "Requires a function with one argument for PRESENT-FUN, \
+but got %S"
+ present-fun)))
+ (let ((present-name (funcall present-fun word))
+ (ls search-method)
+ temp-buffer method)
+ (cond
+ ((setq temp-buffer (get-buffer present-name))
+ ;; already searched
+ (switch-to-buffer temp-buffer))
+ (t
+ ;; must be new buffer
+ (setq temp-buffer (get-buffer-create present-name))
+ (while (consp ls)
+ (setq method (car ls))
+ (setq ls (cdr ls))
+ (let* ((name (dudict-search-name method))
+ (preprocess (dudict-search-pre method))
+ (search (dudict-search-search method))
+ (parse (dudict-search-parse method))
+ (present (dudict-search-present method))
+ temp-buffer parse-result)
+ (cond
+ ((functionp preprocess)
+ (setq word (funcall preprocess word))))
+ (cond
+ ((and
+ (functionp search)
+ (equal (func-arity search) (cons 1 1))))
+ ((error
+ "Requires a function with one argument, but found %S"
+ search)))
+ (setq search (funcall search word))
+ (setq
+ temp-buffer
+ (url-retrieve-synchronously
+ search t t 20))
+ (cond
+ ((functionp parse))
+ (error "Requires a function to parse, but found %S"
+ parse))
+ (with-current-buffer temp-buffer
+ (setq parse-result (funcall parse)))
+ (kill-buffer temp-buffer)
+ (cond
+ ((and (functionp present)
+ (equal (func-arity present) (cons 1 1))))
+ ((error
+ "Requires a function with one argument, but found %S"
+ present)))
+ ;; already created
+ (setq temp-buffer (get-buffer present-name))
+ (with-current-buffer temp-buffer
+ (goto-char (point-max))
+ (newline)
+ (insert "* " name)
+ (newline)
+ (funcall present parse-result))))
+ (switch-to-buffer temp-buffer)
+ (dudict-mode)
+ (goto-char (point-min))))))
+
+;;;; Tamil dictionary buffer name
+
+(defun dudict-tamil-buffer-name (word)
+ "Return the buffer name for holding Tamil WORD searches."
+ (declare (side-effect-free t) (pure t))
+ (concat "தமிழ் - " word))
+
+;;;; agarathi dictionary
+;; For searching the agarathi dictionary, we for now do not need to
+;; preprocess the search term, but in the future it might be useful to
+;; add some preprocessing so that we do not need to enter in the Tamil
+;; script (some sources do not always use the Tamil scripts).
+;;
+;; The search function just queries the website and returns the
+;; buffer.
+;;
+;; The parsing function converts from the Document Object Model to our
+;; custom structs: This is for the ease and the convenience of
+;; presentation.
+;;
+;; Then the present function can iterate through the parse result
+;; easily and present the search result to the user in a
+;; well-structured format that is easy to read and understand.
+;;
+;; The name is surely the string "அகராதி" (अकरादि).
+
+;;;;; Search Struct
+
+(defvar dudict-search-agarathi nil
+ "The search struct that is used to search the agarathi dictionary.")
+
+(setq dudict-search-agarathi
+ (make-dudict-search
+ :name "அகராதி"
+ :pre nil
+ :search (function
+ (lambda (word)
+ (concat "https://agarathi.com/word/" word)))
+ :parse #'dudict-parse-agarathi
+ :present #'dudict-present-agarathi))
+
+;;;;; Short-cut
+
+(defun dudict-search-agarathi (word)
+ "A short-cut that passes `dudict-search-agarathi' and WORD to
+`dudict-search'."
+ (interactive
+ (read-string "Enter search term: " nil 'dudict-tamil-history))
+ (dudict-search
+ dudict-search-agarathi word #'dudict-tamil-buffer-name))
+
+;;;;; Structures of the dictionary website
+
+;;;;;; The entire time line
+(cl-defstruct agarathi-timeline
+ "An entire agarathi search result is called a timeline.
+It contains a list of panels."
+ (panels
+ nil
+ :documentation "The list of panels contained in the timeline."
+ :type list))
+
+;; verifying function
+(defun agarathi-check-valid-timeline (timeline)
+ "Check that an object is a valid timeline structure."
+ (declare (side-effect-free t) (pure t))
+ (and
+ (agarathi-timeline-p timeline)
+ (let ((ls (agarathi-timeline-panels timeline))
+ (result t)
+ temp)
+ (and
+ (listp ls)
+ (progn
+ (while (consp ls)
+ (setq temp (car-safe ls))
+ (setq ls (cdr-safe ls))
+ (cond
+ ((agarathi-check-valid-panel temp))
+ (t
+ (setq result nil)
+ (setq ls nil))))
+ result)))))
+
+;;;;;; A panel
+(cl-defstruct agarathi-panel
+ "A panel contains the search results from some single source.
+It contains a title and a list of descriptions, where each
+description corresponds to a set of definitions, roughly
+speaking."
+ (title
+ ""
+ :documentation "The title of the panel.
+Usually this is the source of the definition. When this is a
+pure advertisement panel, its title would be \"Sponsored Links\"."
+ :type string)
+ (descriptions
+ nil
+ :documentation "The list of descriptions contained in the panel."
+ :type list))
+
+;; verifying function
+(defun agarathi-check-valid-panel (panel)
+ "Check that an object is a valid panel structure."
+ (declare (side-effect-free t) (pure t))
+ (and
+ (agarathi-panel-p panel)
+ (let ((title (agarathi-panel-title panel))
+ (ls (agarathi-panel-descriptions panel))
+ (result t)
+ temp)
+ (and
+ (stringp title)
+ (listp ls)
+ (progn
+ (while (consp ls)
+ (setq temp (car-safe ls))
+ (setq ls (cdr-safe ls))
+ (cond
+ ((agarathi-check-valid-description temp))
+ (t
+ (setq result nil)
+ (setq ls nil))))
+ result)))))
+
+;;;;;; A Description
+(cl-defstruct agarathi-description
+ "A description contains some definitions along with some optional
+examples.
+
+It contains a title along with a list of definitions and a list
+of examples.
+
+Definitions and examples are basically strings."
+ (title
+ ""
+ :documentation "The title of a description.
+This in the convention of the dictionary website means the word
+that this description is describing. Since the dictionary can
+return descriptions that are not exactly equal to the searched
+term, it is necessary to keep this information for every
+description."
+ :type string)
+ (definitions
+ nil
+ :documentation "The list of definitions contained in the description."
+ :type list)
+ (examples
+ nil
+ :documentation "The list of examples contained in the description."
+ :type list))
+
+;; verifying function
+(defun agarathi-check-valid-description (description)
+ "Check that an object is a valid description structure."
+ (declare (side-effect-free t) (pure t))
+ (and
+ (agarathi-description-p description)
+ ;; check definitions
+ (let ((title (agarathi-description-title description))
+ (ls (agarathi-description-definitions description))
+ (result t)
+ temp)
+ (and
+ (stringp title)
+ (listp ls)
+ (progn
+ (while (consp ls)
+ (setq temp (car-safe ls))
+ (setq ls (cdr-safe ls))
+ (cond ((stringp temp)) (t (setq result nil) (setq ls nil))))
+ result)))
+ ;; check examples
+ (let ((ls (agarathi-description-examples description))
+ (result t)
+ temp)
+ (and
+ (listp ls)
+ (progn
+ (while (consp ls)
+ (setq temp (car-safe ls))
+ (setq ls (cdr-safe ls))
+ (cond ((stringp temp)) (t (setq result nil) (setq ls nil))))
+ result)))))
+
+;;;;; main function
+
+(defun dudict-parse-agarathi ()
+ "Parse the search result from the agarathi website.
+The current buffer is assumed to contain the search results already."
+ (let* ((dom (libxml-parse-html-region (point-min) (point-max)))
+ (timeline-ul
+ (car-safe
+ (delq
+ nil
+ (mapcar
+ (lambda (node)
+ (cond ((string= (dom-attr node 'class)
+ "timeline")
+ node)))
+ (dom-by-tag dom 'ul))))))
+ (cond
+ ((null timeline-ul)
+ (message "No timeline is found.")
+ nil)
+ ((dudict-parse-agarathi-timeline timeline-ul)))))
+
+;;;;; Archived code
+;; (defun dudict-search-agarathi (word)
+;; "Request for the definition of Tamil WORD from the dictionary
+;; website agarathi.
+
+;; WORD should be acceptable by agarathi: no processing is done to
+;; convert from some transliteration to the Tamil script."
+;; (let* ((name (concat "dudict - " word))
+;; (buffer-found (get-buffer name))
+;; (buffer
+;; (cond
+;; (buffer-found)
+;; ((url-retrieve-synchronously
+;; (concat "https://agarathi.com/word/" word)
+;; t t 20))))
+;; result titles count)
+;; (cond
+;; ((not buffer-found)
+;; (with-current-buffer buffer
+;; (let* ((dom (libxml-parse-html-region
+;; (point-min) (point-max)))
+;; (timeline-ul (delq
+;; nil
+;; (mapcar
+;; (lambda (node)
+;; (cond ((string= (dom-attr node 'class)
+;; "timeline")
+;; node)))
+;; (dom-by-tag dom 'ul))))
+;; (timelines (delq
+;; nil
+;; (mapcar
+;; (lambda (node)
+;; (let ((class (dom-attr node 'class)))
+;; (cond
+;; ((or
+;; (string= class "timeline-normal")
+;; (string=
+;; class "timeline-inverted"))
+;; node))))
+;; (dom-by-tag timeline-ul 'li))))
+;; (timeline-panels
+;; (mapcar
+;; (lambda (node) (dom-by-class node "timeline-panel"))
+;; timelines))
+;; (timeline-panels
+;; (delq
+;; nil
+;; (mapcar (lambda (node)
+;; (let ((result
+;; (dudict-parse-agarathi-panel node)))
+;; (cond
+;; ((string=
+;; (cadr result)
+;; "Sponsored Links")
+;; nil)
+;; (result))))
+;; timeline-panels))))
+;; (setq result timeline-panels)
+;; (kill-buffer buffer)))
+;; (setq buffer (get-buffer-create name))
+;; (with-current-buffer buffer
+;; (delete-region (point-min) (point-max))
+;; (goto-char (point-min))
+;; (mapc
+;; (lambda (panel)
+;; (setq titles (car panel))
+;; (setq panel (cdr panel))
+;; ;; insert panel source
+;; (insert "* " (car panel))
+;; (setq panel (cdr panel))
+;; (setq count 0)
+;; (mapc
+;; (lambda (def)
+;; (newline)
+;; (insert
+;; (format
+;; "** %s\n\n"
+;; (dom-text (nth count titles))))
+;; (setq count (1+ count))
+;; (mapc
+;; (lambda (def)
+;; (insert
+;; (mapconcat
+;; #'identity
+;; (split-string def "\n" t "\\s-+")
+;; " ")
+;; " ")
+;; ;; (newline)
+;; )
+;; def)
+;; (newline))
+;; panel)
+;; (newline 2))
+;; result))))
+;; (switch-to-buffer buffer)
+;; (dudict-mode)
+;; (goto-char (point-min))))
+
+;;;;; Parse a timeline
+(defun dudict-parse-agarathi-timeline (dom)
+ "Convert a DOM object into a timeline structure.
+See the documentation for the cl-type 'agarathi-timeline' to know more
+about the timeline structure."
+ (let* ((panels (dom-by-class dom "timeline-panel"))
+ (parsed-panels (mapcar #'dudict-parse-agarathi-panel panels))
+ (filtered-panels
+ (delq
+ nil
+ (mapcar
+ (lambda (panel)
+ (cond
+ ((dudict-agarathi-is-ad-panel panel) nil)
+ (panel)))
+ parsed-panels))))
+ (make-agarathi-timeline :panels filtered-panels)))
+
+;;;;; Filter out ads
+(defun dudict-agarathi-is-ad-panel (panel)
+ "If PANEL is a pure advertisement panel, return t, else return
+nil."
+ (declare (side-effect-free t) (pure t))
+ (and
+ ;; just in case
+ (agarathi-panel-p panel)
+ (string= (agarathi-panel-title panel) "Sponsored Links")))
+
+;;;;; Parse a panel
+
+(defun dudict-parse-agarathi-panel (dom)
+ "Convert a DOM object into a panel structure.
+See the documentation for the cl-type 'agarathi-panel' to know more
+about the panel structure."
+ (declare (side-effect-free t) (pure t))
+ (let* ((title
+ (cond
+ ((car-safe (dom-by-class dom "timeline-title")))
+ ("No title found")))
+ (title
+ (cond
+ ((listp title) (dom-texts title))
+ (title)))
+ (timeline-body (car-safe (dom-by-class dom "timeline-body")))
+ (style (cond ((string=
+ (dom-attr timeline-body 'id)
+ "winslow")
+ 'quoted)
+ ('direct)))
+ (descriptions
+ (dudict-parse-agarathi-descriptions timeline-body style)))
+ (make-agarathi-panel
+ :title title
+ :descriptions descriptions)))
+
+;; (defun dudict-parse-agarathi-panel (dom)
+;; "Convert a DOM object into a panel structure.
+;; See the documentation for the cl-type 'agarathi-panel' to know more
+;; about the panel structure."
+;; (let* ((panel (car dom))
+;; (panel-title
+;; (dom-texts (car (dom-by-class panel "timeline-title"))))
+;; (titles (delq
+;; nil
+;; (mapcar
+;; (lambda (node)
+;; (cond ((dom-attr node 'class) nil) (node)))
+;; (dom-by-tag panel 'h3))))
+;; (description
+;; (delq
+;; nil
+;; (mapcar
+;; (lambda (desc)
+;; (mapcar
+;; (lambda (node)
+;; (cond
+;; ((listp node) (dom-texts node))
+;; ((stringp node) node)))
+;; (dom-children desc)))
+;; (dom-by-class panel "description")))))
+;; (cons
+;; titles
+;; (cons panel-title (mapcar #'flatten-list description)))))
+
+;;;;; Parse descriptions
+(defun dudict-parse-agarathi-descriptions (dom style)
+ "Convert a DOM object into a list of description structures in the
+STYLE style.
+
+See the documentation of the function
+`dudict-parse-agarathi-description' for how STYLE is needed.
+
+See the documentation for the cl-type 'agarathi-description' to know
+more about the description structure."
+ (declare (side-effect-free t) (pure t))
+ ;; We walk the children of the document object model.
+ ;;
+ ;; The basic procedure is that, when we encounter a 'h3' tag, the
+ ;; text of that tag is the title of the following 'div' tag, which
+ ;; must have the class 'description'.
+ (let* ((children (dom-children dom))
+ (title "No title found")
+ child description result)
+ (while (consp children)
+ (setq child (car-safe children))
+ (setq children (cdr-safe children))
+ (cond
+ ((not (listp child)))
+ ((eq (dom-tag child) 'h3) (setq title (dom-text child)))
+ ((string= (dom-attr child 'class) "description")
+ (setq description (dudict-parse-agarathi-description
+ child style))
+ ;; Actually the parse result contains only the definitions and
+ ;; examples.
+ (setq description
+ (make-agarathi-description
+ :title title
+ :definitions (car-safe description)
+ :examples (cdr-safe description)))
+ (setq result (cons description result)))))
+ (reverse result)))
+
+;;;;; Parse one description
+(defun dudict-parse-agarathi-description (dom style)
+ "Convert a list of DOM objects into a description structure in the
+STYLE style.
+
+STYLE is either `direct' or `quoted'. Since some dictoinaries
+present the definitions as direct children of the 'description'
+tag, while some present them as the 'blockquote' children, we
+must have this information STYLE to correctly parse the
+descriptions.
+
+Note that the title is not parsed here, but in the function
+`dudict-parse-agarathi-descriptions', so we only return the cons
+of the definitions and examples contained in the description.
+
+See the documentation for the cl-type 'agarathi-description' to
+know more about the description structure."
+ (declare (side-effect-free t) (pure t))
+ (cond
+ ((listp dom))
+ ((error "DOM should be a list, but got %S: %S"
+ (type-of dom) dom)))
+ (cond
+ ((memq style (list 'direct 'quoted)))
+ ((error
+ "STYLE should be either 'direct' or 'quoted', but got %S"
+ style)))
+ (cond
+ ((eq style 'quoted)
+ (setq dom (car-safe (dom-by-tag dom 'blockquote)))))
+ ;; We walk the children of the document object model.
+ ;;
+ ;; Basically, if we encounter an element that is not a 'blockquote'
+ ;; tag, we store all of these texts into a string and later parse
+ ;; them into definitions. When we encounter 'blockquote' tags, we
+ ;; store them into examples instead.
+ (let* ((children (dom-children dom))
+ (temp-string (string))
+ definitions examples child)
+ (while (consp children)
+ (setq child (car-safe children))
+ (setq children (cdr-safe children))
+ (cond
+ ((not
+ (and
+ (listp child)
+ (eq (dom-tag child) 'blockquote)))
+ (setq temp-string
+ (concat temp-string
+ (cond
+ ((listp child) (dom-texts child))
+ ((format "%s" child))))))
+ ((cond
+ ((null definitions)
+ (setq definitions
+ (dudict-parse-agarathi-definitions temp-string))
+ (setq temp-string (string)))
+ ((setq examples (cons (dom-texts child) examples)))))))
+ (cond
+ ((and
+ (not (string-empty-p temp-string))
+ (null definitions))
+ (setq definitions
+ (dudict-parse-agarathi-definitions temp-string))))
+ (setq examples (reverse examples))
+ (cons definitions examples)))
+
+;;;;; Parse into definitions
+(defun dudict-parse-agarathi-definitions (str)
+ "Parse a string STR into a list of definitions.
+For now this just looks for the following format.
+
+SPACE NUMBER DOT SPACE
+
+More precise parsing might added in the future, if such needs
+arise."
+ (declare (side-effect-free t) (pure t))
+ (save-match-data
+ (let* ((len (length str))
+ (old-index 0)
+ (index 0)
+ result)
+ (while (< index len)
+ (cond
+ ((setq index
+ (string-match
+ (rx-to-string
+ (list 'seq
+ 'space
+ (list 'one-or-more 'digit)
+ ?. 'space)
+ t)
+ str index))
+ (setq result
+ (cons
+ (substring-no-properties
+ str old-index index)
+ result))
+ (setq old-index index)
+ (setq index (+ index 4)))
+ ((setq index len)
+ (setq result
+ (cons
+ (substring-no-properties str old-index)
+ result)))))
+ (reverse result))))
+
+;;;;; Insert the parse result into buffer
+(defun dudict-present-agarathi (timeline)
+ "Insert the TIMELINE structure into the current buffer."
+ (cond
+ ((agarathi-timeline-p timeline))
+ ((error "Requires a timeline, but found %S" timeline)))
+ (let* ((panels (agarathi-timeline-panels timeline))
+ panel
+ descriptions description
+ definitions examples)
+ (while (consp panels)
+ (setq panel (car panels))
+ (setq panels (cdr panels))
+ (insert "** " (agarathi-panel-title panel))
+ (newline)
+ (setq descriptions (agarathi-panel-descriptions panel))
+ (while (consp descriptions)
+ (setq description (car descriptions))
+ (setq descriptions (cdr descriptions))
+ (insert
+ "*** "
+ (dudict-trim (agarathi-description-title description)))
+ (newline)
+ (setq definitions
+ (agarathi-description-definitions description))
+ (mapc
+ (lambda (def) (insert "- " (dudict-trim def)) (newline))
+ definitions)
+ (newline)
+ (setq examples
+ (agarathi-description-examples description))
+ (cond
+ (examples
+ (insert "**** Examples")
+ (newline)
+ (mapc
+ (lambda (ex) (insert "+ " (dudict-trim ex)) (newline))
+ examples)))))))
+
+(defun dudict-trim (str)
+ "Replace any block of spaces from STR by one space."
+ (declare (side-effect-free t) (pure t))
+ (replace-regexp-in-string
+ (rx-to-string
+ (list 'seq (list 'one-or-more 'space))
+ t)
+ (string 32)
+ str))
+
+;;;; Tamilverb dictionary
+
+;;;;; Search Struct
+
+(defvar dudict-search-tamilverb nil
+ "The search struct that is used to search the Tamil Verb \
+dictionary.")
+
+(setq dudict-search-tamilverb
+ (make-dudict-search
+ :name "TamilVerb"
+ :pre nil
+ :search (function
+ (lambda (word)
+ (concat "https://tamilverb.com/?q=" word)))
+ :parse #'dudict-parse-tamilverb
+ :present #'dudict-present-tamilverb))
+
+;;;;; Short-cut
+
+(defun dudict-search-tamilverb (word)
+ "A short-cut that passes `dudict-search-tamilverb' and WORD to
+`dudict-search'."
+ (interactive
+ (read-string "Enter search term: " nil 'dudict-tamil-history))
+ (dudict-search
+ dudict-search-tamilverb word #'dudict-tamil-buffer-name))
+
+;;;;; Structures
+
+;;;;;; All entries
+
+(cl-defstruct tamilverb-entries
+ "A list of entries."
+ (ls
+ nil
+ :documentation "The list of contained entries.
+Each entry has the type `tamilverb-entry'."
+ :type list))
+
+(defun tamilverb-check-valid-entries (entries)
+ "Check that an object is a valid entries structure."
+ (declare (side-effect-free t) (pure t))
+ (and
+ (tamilverb-entries-p entries)
+ (let ((ls (tamilverb-entries-ls entries))
+ (result t)
+ temp)
+ (and
+ (listp ls)
+ (progn
+ (while (consp ls)
+ (setq temp (car-safe ls))
+ (setq ls (cdr-safe ls))
+ (cond
+ ((tamilverb-check-valid-entry temp))
+ (t
+ (setq result nil)
+ (setq ls nil))))
+ result)))))
+
+;;;;;; One entry
+
+(cl-defstruct tamilverb-entry
+ "An entry of the definition.
+It contains the headword, which is a short summary of the definition.
+
+Then it contains the verb class, and some \"sub-words\", which
+are alternate (spoken) spellings.
+
+Then follow the detailed definitions.
+
+Finally it contains one usage example and optionally some
+synonyms."
+ (head
+ ""
+ :documentation "A short summary."
+ :type string)
+ (class
+ 0
+ :documentation "The verb class of the verb."
+ :type integer)
+ (transitivity
+ nil
+ :documentation "The transitivity of the verb."
+ :type bool)
+ (def
+ ""
+ :documentation "Detailed definition."
+ :type string)
+ (sub
+ ""
+ :documentation "Spellings"
+ :type string)
+ (ex
+ ""
+ :documentation "A usage example"
+ :type string)
+ (synonyms
+ nil
+ :documentation "Some synonyms"
+ :type list))
+
+(defun tamilverb-check-valid-entry (entry)
+ "Check that an object is a valid entry structure."
+ (declare (side-effect-free t) (pure t))
+ (and
+ (tamilverb-entry-p entry)
+ (let ((head (tamilverb-entry-head entry))
+ (class (tamilverb-entry-class entry))
+ (trans (tamilverb-entry-transitivity entry))
+ (def (tamilverb-entry-def entry))
+ (sub (tamilverb-entry-sub entry))
+ (ex (tamilverb-entry-ex entry))
+ (synonyms (tamilverb-entry-synonyms entry)))
+ (and
+ (stringp head)
+ (integerp class)
+ (or (null trans) (equal trans t))
+ (stringp def)
+ (stringp sub)
+ (stringp ex)
+ (listp synonyms)
+ (eval (cons #'and (mapcar #'stringp synonyms)))))))
+
+;;;;; main function
+
+(defun dudict-parse-tamilverb ()
+ "Parse the search result from the Tamil Verb website.
+The current buffer is assumed to contain the search results already."
+ ;; ;; First re-insert the results, as the Tamil Verb website escapes
+ ;; ;; the characters for some reason.
+ ;; (let ((dom (libxml-parse-html-region (point-min) (point-max))))
+ ;; (delete-region (point-min) (point-max))
+ ;; (dom-print dom))
+ (let* ((dom (libxml-parse-html-region (point-min) (point-max)))
+ (entries (dom-by-tag dom 'entry))
+ (result (make-tamilverb-entries)))
+ (cond
+ ((null entries))
+ ((setq
+ result
+ (make-tamilverb-entries
+ :ls (mapcar #'dudict-parse-tamilverb-entry entries)))))
+ result))
+
+;;;;; Parse one entry
+
+(defun dudict-parse-tamilverb-entry (entry)
+ "Convert ENTRY to a `tamilverb-entry' struct."
+ (declare (side-effect-free t) (pure t))
+ (let* ((head (dom-text (car (dom-by-tag entry 'headword))))
+ (class (dom-text (car (dom-by-tag entry 'verbclass))))
+ (trans (cond
+ ((string-match-p "intr" class) nil)
+ (t)))
+ (class (string-to-number
+ (save-match-data
+ (string-match (rx-to-string (list 'seq 'digit) t)
+ class)
+ (match-string 0 class))))
+ (def (dom-text (car (dom-by-tag entry 'definition))))
+ (sub (dom-texts (car (dom-by-tag entry 'subword))))
+ (ex (dom-texts
+ (car
+ (dom-by-class
+ (car (dom-by-tag entry 'usage))
+ (rx-to-string (list 'seq 'bos "detail" 'eos) t)))))
+ (synonyms (dom-texts
+ (car
+ (dom-by-class
+ (car (dom-by-tag entry 'synonyms))
+ (rx-to-string
+ (list 'seq 'bos "detail" 'eos) t))))))
+ (make-tamilverb-entry
+ :head head
+ :class class
+ :transitivity trans
+ :def def
+ :sub sub
+ :ex ex
+ :synonyms synonyms)))
+
+;;;;; De-escaping function
+
+(defun dudict-de-escape (str)
+ "If STR contains slices of the form \"&#xNNN;\", where NNN are
+digits, replace those slices by UNICODE characters with the
+corresponding UNICODE code point.
+
+Tamil Verb website escapes the Tamil Script in this way, maybe to
+suit some legacy devices?"
+ (declare (side-effect-free t) (pure t))
+ (let ((s str)
+ (regex (rx-to-string
+ (list
+ 'seq "&#x"
+ (list 'group (list 'one-or-more 'hex))
+ ?\;)
+ t)))
+ (save-match-data
+ (while (string-match regex s)
+ (setq
+ s
+ (replace-match
+ (make-string 1 (string-to-number (match-string 1 s) 16))
+ t t s))))
+ s))
+
+;;;;; Insert the parse result
+
+(defun dudict-present-tamilverb (entries)
+ "Insert the entries into the current buffer."
+ (cond
+ ((tamilverb-entries-p entries))
+ ((error "Requires a tamilverb-entries, but found %S" entries)))
+ (let* ((entries (tamilverb-entries-ls entries))
+ entry head class trans def ex syn)
+ (cond
+ ((null entries)
+ (insert "Not found")
+ (newline)))
+ (while (consp entries)
+ (setq entry (car entries))
+ (setq entries (cdr entries))
+ (insert "** " (tamilverb-entry-head entry))
+ (newline)
+ (insert
+ (format "Class: %d" (tamilverb-entry-class entry)))
+ (newline)
+ (insert
+ (format "Transitivity: %S"
+ (tamilverb-entry-transitivity entry)))
+ (newline)
+ (insert
+ (dudict-trim (dudict-de-escape (tamilverb-entry-sub entry))))
+ (newline 2)
+ (insert (tamilverb-entry-def entry))
+ (newline)
+ (insert "*** Example Usage")
+ (newline)
+ (insert
+ (dudict-trim (dudict-de-escape (tamilverb-entry-ex entry))))
+ (newline)
+ (insert "*** Synonyms")
+ (newline)
+ (insert
+ (dudict-trim
+ (dudict-de-escape (tamilverb-entry-synonyms entry))))
+ (newline))))
+
+;;;; Wiktionary search
+
+;; TODO: Take the search from https://en.wiktionary.org/ into the
+;; considerations, as sometimes this is more reliable and includes
+;; more modern usages.
+
+(provide 'durand-dict)
+;;; durand-dict.el ends here