February 2, 2023
By: Kevin
精确统计clojure中的代码, 注释, 空行
cloc为代表的代码行数统计工具统计代码行数的时候, 比较傻, 只会把 ; 开头的当成注释.
以下的注释类型都不会被统计到, 结果不太准确.
- 富注释
(comment ...) - 文档字符串
docstring - 行间注释
要准确统计到这些内容, 需要我们的代码统计工具能够解析clojure的代码, 也就是需要个语法解析器.
正好rewrite-clj, 日渐成熟, 支撑起了一套代码重构, 分析的工具链.
稍微做了下尝试, 很简单就实现了一个clojure的精准代码行数统计器.
babashka脚本
babashka已经内置了rewrite-clj, 只需引入specter来做数据结构遍历, doric来绘控制台的表格
(require '[babashka.deps :as deps])
(deps/add-deps '{:deps {com.rpl/specter {:mvn/version "1.1.4"}
doric {:mvn/version "0.9.0"}}})
生成类似下面的统计
|------+-------+---------------+--------+----------|
| Ext | Files | Lines of Code | Spaces | Comments |
|------+-------+---------------+--------+----------|
| .clj | 1 | 357 | 208 | 165 |
| ____ | _____ | _____________ | ______ | ________ |
| SUM: | 1 | 357 | 208 | 165 |
|------+-------+---------------+--------+----------|
代码主体
下面把主要的函贴出来
(require '[clojure.java.io :as io]
'[clojure.pprint :as pp]
'[clojure.string :as str]
'[rewrite-clj.parser :as parser]
'[rewrite-clj.node :as node]
'[com.rpl.specter :as specter]
'[doric.core :as doric])
(import java.nio.file.FileSystems)
(def clj-matcher (.getPathMatcher
(FileSystems/getDefault)
"glob:*.{clj,cljs,cljc}"))
(defn find-all-clojure-files [dir]
(->> dir
io/file ;; 目录生成文件对象
file-seq ;; 遍历所有的文件(文件夹)
(filter #(.isFile %)) ;; 过滤, 只要文件
;; 过滤, clj/cljs/cljc文件
(filter #(.matches clj-matcher (.getFileName (.toPath %))))))
(defn doc-strings
"返回当前node的doc-string, 如果当前node是`def`, `defn`, `ns`, 返回docsting
的vector, vector的每一个元素是一行string
如果没有docstirng, 返回nil"
[node]
(let [doc-str-vec (and (= (:tag node) :list)
(#{'def 'defn 'ns} (-> node
:children
first
:value))
(try
(-> node
:children
(nth 5)
:lines)
(catch Exception _e
nil)))]
doc-str-vec))
(defn src-lins
"文件全部行数"
[file]
(-> file
io/reader
line-seq
count))
(defn src-space-lines
"文件空行数"
[file]
(->> file
io/reader
line-seq
(remove (complement str/blank?))
count))
(defn src-comment-lines
"注释行数, 语句中所有含有;的语句都算"
[file]
(->> file
io/reader
line-seq
(filter #(str/includes? % ";"))
count))
(defn src-inline-comment-lines
"注释行数, 代码和注释都出现在同一行的情况
比如:
`(+ 1 1) ;; 一加一等于二`
"
[file]
(->> file
io/reader
line-seq
(filter #(str/includes? % ";"))
count))
(defn src-doc-string-lines
"docstring的行数"
[file]
(->> file
slurp
parser/parse-string-all
:children
(map doc-strings)
count))
(defn rich-comment-node?
"判断是不是一个`(comment ...)`节点"
[node]
(and (= :list (node/tag node))
(some-> node
:children
first
:value
(= 'comment))))
(defn unevaled-node?
"判断是不是一个`#_(...)`节点"
[node]
(= :uneval (node/tag node)))
(defn sexpr-to-str
"输出一个表达式为字符串"
[s]
(with-out-str
(pp/pprint s)))
(defn unevaled-lines
"#_(...) 语句块的行数"
[file]
(->> file
slurp
parser/parse-string-all
:children
(filter unevaled-node?)
(node/string)
(re-seq #"\\n")
count
inc
))
(defn rich-comment-lines
"(comment)语句块的行数"
[file]
(->> file
slurp
parser/parse-string-all
:children
(filter rich-comment-node?)
node/sexprs
sexpr-to-str
str/split-lines
count
))
(defn metrics
"获得某个文件的全部统计数据"
[file]
(let [total-counts (src-lins file)
spaces-counts (src-space-lines file)
comments-counts (src-comment-lines file)
doc-strings-count (src-doc-string-lines file)
rich-comments-count (rich-comment-lines file)
unevaled-count (unevaled-lines file)]
{:ext ".clj"
:path (.getPath file)
:files 3
:lines (- total-counts
spaces-counts
comments-counts
doc-strings-count
rich-comments-count
unevaled-count)
:spaces (+ rich-comments-count spaces-counts)
:comments (+ comments-counts
doc-strings-count
)}))
;; Generating ASCII report, 以下用于生成ascii报告
(def columns [{:name :ext :align :left}
{:name :path :align :left :title "File"}
{:name :files :align :right}
{:name :lines :align :right :title "Lines of Code"}
{:name :spaces :align :right}
{:name :comments :align :right}])
(defn ascii-table [rows]
(let [ks (-> rows first keys set)]
(doric/table (filter #(contains? ks (:name %)) columns)
rows)))
(defn dash-row [rows]
(reduce (fn [ret k]
(let [col (->> columns (filter #(= (:name %) k)) first)]
(assoc ret k (apply str
(repeat (->> rows
(map #(count (str (get % k))))
(apply max
4
(count (name (:name col)))
(count (get col :title ""))))
"_")))))
{}
(keys (first rows))))
(defn map-vals [f m]
(reduce-kv (fn [r k v] (assoc r k (f v))) {} m))
(defn table-by-ext [fms]
(let [by-ext (->> fms
(group-by :ext)
(map-vals (fn [ms]
(as-> ms $
(map #(dissoc % :ext :path) $)
(apply merge-with + $)
(assoc $ :files (count ms)))))
(seq)
(map #(assoc (val %) :ext (key %))))
totals (assoc (->> by-ext (map #(dissoc % :ext)) (apply merge-with +))
:ext "SUM:")]
(ascii-table (concat (sort-by #(get % :lines -1) > by-ext)
[(dash-row by-ext)]
[totals]))))
(defn table-by-file [fms]
(let [totals (assoc (->> fms (map #(dissoc % :ext :path)) (apply merge-with +))
:path "SUM:")]
(ascii-table (concat (sort-by #(get % :lines -1) > fms)
[(dash-row fms)]
[totals]))))
(defn print-report [ms & [opts]]
(let [info (get opts :info println)
warn (get opts :warn println)
errs (->> ms
(filter :error)
(map (fn [x] (merge (ex-data (:error x))
x
{:error (.getMessage (:error x))}))))
fms (remove :error ms)]
(info "Found" (count ms) "source files.")
(when (seq errs)
(warn "Encountered" (count errs) "reader errors:")
(doseq [e errs]
(warn (pr-str e))))
(when (seq fms)
(info "")
(info (if (:by-file opts)
(table-by-file fms)
(table-by-ext fms))))))