February 2, 2023
By: Kevin

精确统计clojure中的代码, 注释, 空行

  1. babashka脚本
  2. 代码主体

cloc为代表的代码行数统计工具统计代码行数的时候, 比较傻, 只会把 ; 开头的当成注释.

以下的注释类型都不会被统计到, 结果不太准确.

  • 富注释(comment ...)
  • 文档字符串 docstring
  • 行间注释

要准确统计到这些内容, 需要我们的代码统计工具能够解析clojure的代码, 也就是需要个语法解析器.

正好rewrite-clj, 日渐成熟, 支撑起了一套代码重构, 分析的工具链.

稍微做了下尝试, 很简单就实现了一个clojure的精准代码行数统计器.

babashka脚本

babashka已经内置了rewrite-clj, 只需引入specter来做数据结构遍历, doric来绘控制台的表格

(require '[babashka.deps :as deps])
(deps/add-deps '{:deps {com.rpl/specter {:mvn/version "1.1.4"}
                        doric           {:mvn/version "0.9.0"}}})

生成类似下面的统计

|------+-------+---------------+--------+----------|
| Ext  | Files | Lines of Code | Spaces | Comments |
|------+-------+---------------+--------+----------|
| .clj |     1 |           357 |    208 |      165 |
| ____ | _____ | _____________ | ______ | ________ |
| SUM: |     1 |           357 |    208 |      165 |
|------+-------+---------------+--------+----------|

代码主体

下面把主要的函贴出来

(require '[clojure.java.io :as io]
         '[clojure.pprint :as pp]
         '[clojure.string :as str]
         '[rewrite-clj.parser :as parser]
         '[rewrite-clj.node :as node]
         '[com.rpl.specter :as specter]
         '[doric.core :as doric])
(import  java.nio.file.FileSystems)

(def clj-matcher (.getPathMatcher
                  (FileSystems/getDefault)
                  "glob:*.{clj,cljs,cljc}"))

(defn find-all-clojure-files [dir]
  (->> dir
       io/file              ;; 目录生成文件对象
       file-seq             ;; 遍历所有的文件(文件夹)
       (filter #(.isFile %)) ;; 过滤, 只要文件
       ;; 过滤, clj/cljs/cljc文件
       (filter #(.matches clj-matcher (.getFileName (.toPath %))))))

(defn doc-strings
  "返回当前node的doc-string, 如果当前node是`def`, `defn`, `ns`, 返回docsting
   的vector, vector的每一个元素是一行string
   如果没有docstirng, 返回nil"
  [node]
  (let [doc-str-vec (and (= (:tag node) :list)
                         (#{'def 'defn 'ns}  (-> node
                                                 :children
                                                 first
                                                 :value))
                         (try
                           (-> node
                               :children
                               (nth 5)
                               :lines)
                           (catch Exception _e
                             nil)))]
    doc-str-vec))


(defn src-lins
  "文件全部行数"
  [file]
  (-> file
      io/reader
      line-seq
      count))

(defn src-space-lines
  "文件空行数"
  [file]
  (->> file
       io/reader
       line-seq
       (remove (complement str/blank?))
       count))


(defn src-comment-lines
  "注释行数, 语句中所有含有;的语句都算"
  [file]
  (->> file
       io/reader
       line-seq
       (filter #(str/includes? % ";"))
       count))

(defn src-inline-comment-lines
  "注释行数, 代码和注释都出现在同一行的情况
   比如:
   `(+ 1 1) ;; 一加一等于二`
   "
  [file]
  (->> file
       io/reader
       line-seq
       (filter #(str/includes? % ";"))
       count))

(defn src-doc-string-lines
  "docstring的行数"
  [file]
  (->> file
       slurp
       parser/parse-string-all
       :children
       (map doc-strings)
       count))

(defn rich-comment-node?
  "判断是不是一个`(comment ...)`节点"
  [node]
  (and (= :list (node/tag node))
       (some-> node
               :children
               first
               :value
               (= 'comment))))

(defn unevaled-node?
  "判断是不是一个`#_(...)`节点"
  [node]
  (= :uneval (node/tag node)))

(defn sexpr-to-str
  "输出一个表达式为字符串"
  [s]
  (with-out-str
    (pp/pprint s)))

(defn unevaled-lines
  "#_(...) 语句块的行数"
  [file]
  (->> file
       slurp
       parser/parse-string-all
       :children
       (filter unevaled-node?)
       (node/string)
       (re-seq #"\\n")
       count
       inc
       ))

(defn rich-comment-lines
  "(comment)语句块的行数"
  [file]
  (->> file
       slurp
       parser/parse-string-all
       :children
       (filter rich-comment-node?)
       node/sexprs
       sexpr-to-str
       str/split-lines
       count
       ))

(defn metrics
  "获得某个文件的全部统计数据"
  [file]
  (let [total-counts  (src-lins file)
        spaces-counts  (src-space-lines file)
        comments-counts (src-comment-lines file)
        doc-strings-count (src-doc-string-lines file)
        rich-comments-count (rich-comment-lines file)
        unevaled-count (unevaled-lines file)]
    {:ext ".clj"
     :path (.getPath file)
     :files 3
     :lines (- total-counts
               spaces-counts
               comments-counts
               doc-strings-count
               rich-comments-count
               unevaled-count)
     :spaces (+ rich-comments-count spaces-counts)
     :comments  (+ comments-counts
                   doc-strings-count
                   )}))


;; Generating ASCII report, 以下用于生成ascii报告

(def columns [{:name :ext   :align :left}
              {:name :path  :align :left :title "File"}
              {:name :files :align :right}
              {:name :lines :align :right :title "Lines of Code"}
              {:name :spaces :align :right}
              {:name :comments :align :right}])

(defn ascii-table [rows]
  (let [ks (-> rows first keys set)]
    (doric/table (filter #(contains? ks (:name %)) columns)
                 rows)))

(defn dash-row [rows]
  (reduce (fn [ret k]
            (let [col (->> columns (filter #(= (:name %) k)) first)]
              (assoc ret k (apply str
                                  (repeat (->> rows
                                               (map #(count (str (get % k))))
                                               (apply max
                                                      4
                                                      (count (name (:name col)))
                                                      (count (get col :title ""))))
                                          "_")))))
          {}
          (keys (first rows))))

(defn map-vals [f m]
  (reduce-kv (fn [r k v] (assoc r k (f v))) {} m))

(defn table-by-ext [fms]
  (let [by-ext (->> fms
                    (group-by :ext)
                    (map-vals (fn [ms]
                                (as-> ms $
                                  (map #(dissoc % :ext :path) $)
                                  (apply merge-with + $)
                                  (assoc $ :files (count ms)))))
                    (seq)
                    (map #(assoc (val %) :ext (key %))))
        totals (assoc (->> by-ext (map #(dissoc % :ext)) (apply merge-with +))
                      :ext "SUM:")]
    (ascii-table (concat (sort-by #(get % :lines -1) > by-ext)
                         [(dash-row by-ext)]
                         [totals]))))

(defn table-by-file [fms]
  (let [totals (assoc (->> fms (map #(dissoc % :ext :path)) (apply merge-with +))
                      :path "SUM:")]
    (ascii-table (concat (sort-by #(get % :lines -1) > fms)
                         [(dash-row fms)]
                         [totals]))))

(defn print-report [ms & [opts]]
  (let [info (get opts :info println)
        warn (get opts :warn println)
        errs (->> ms
                  (filter :error)
                  (map (fn [x] (merge (ex-data (:error x))
                                      x
                                      {:error (.getMessage (:error x))}))))
        fms (remove :error ms)]
    (info "Found" (count ms) "source files.")
    (when (seq errs)
      (warn "Encountered" (count errs) "reader errors:")
      (doseq [e errs]
        (warn (pr-str e))))
    (when (seq fms)
      (info "")
      (info (if (:by-file opts)
              (table-by-file fms)
              (table-by-ext fms))))))
Tags: clojure babashka