电子书解析

构建函数

Book 对象分为两种场景,第一种是直接从电子书文件中解析出 Book 对象,第二种是从 data 对象中生成 Book 对象:

constructor(file, data) {
    if (file) {
      this.createBookFromFile(file)
    } else if (data) {
      this.createBookFromData(data)
    }
}
1
2
3
4
5
6
7

从文件创建 Book 对象

从文件读取电子书后,初始化 Book 对象:

createBookFromFile(file) {
    const {
      destination: des, // 文件本地存储目录
      filename, // 文件名称
      mimetype = MIME_TYPE_EPUB // 文件资源类型
    } = file
    const suffix = mimetype === MIME_TYPE_EPUB ? '.epub' : ''
    const oldBookPath = `${des}/${filename}`
    const bookPath = `${des}/${filename}${suffix}`
    const url = `${UPLOAD_URL}/book/${filename}${suffix}`
    const unzipPath = `${UPLOAD_PATH}/unzip/${filename}`
    const unzipUrl = `${UPLOAD_URL}/unzip/${filename}`
    if (!fs.existsSync(unzipPath)) {
      fs.mkdirSync(unzipPath, { recursive: true }) // 创建电子书解压后的目录
    }
    if (fs.existsSync(oldBookPath) && !fs.existsSync(bookPath)) {
      fs.renameSync(oldBookPath, bookPath) // 重命名文件
    }
    this.fileName = filename // 文件名
    this.path = `/book/${filename}${suffix}` // epub文件路径
    this.filePath = this.path // epub文件路径
    this.url = url // epub文件url
    this.title = '' // 标题
    this.author = '' // 作者
    this.publisher = '' // 出版社
    this.contents = [] // 目录
    this.cover = '' // 封面图片URL
    this.category = -1 // 分类ID
    this.categoryText = '' // 分类名称
    this.language = '' // 语种
    this.unzipPath = `/unzip/${filename}` // 解压后的电子书目录
    this.unzipUrl = unzipUrl // 解压后的电子书链接
    this.originalName = file.originalname
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34

从数据创建 Book 对象

从表单对象中创建 Book 对象:

createBookFromData(data) {
    this.fileName = data.fileName
    this.cover = data.coverPath
    this.title = data.title
    this.author = data.author
    this.publisher = data.publisher
    this.bookId = data.fileName
    this.language = data.language
    this.rootFile = data.rootFile
    this.originalName = data.originalName
    this.path = data.path || data.filePath
    this.filePath = data.path || data.filePath
    this.unzipPath = data.unzipPath
    this.coverPath = data.coverPath
    this.createUser = data.username
    this.createDt = new Date().getTime()
    this.updateDt = new Date().getTime()
    this.updateType = data.updateType === 0 ? data.updateType : UPDATE_TYPE_FROM_WEB
    this.contents = data.contents
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20

电子书解析

初始化后,可以调用 Book 实例的 parse 方法解析电子书,这里我们使用了 epub 库,我们直接将 epub 库源码集成到项目中:

epub 库集成

epub 库源码:https://github.com/julien-c/epub,我们直接将 epub.js 拷贝到 /utils/epub.js

epub 库获取图片逻辑修改

修改获取图片的源码:

getImage(id, callback) {
    if (this.manifest[id]) {
      if ((this.manifest[id]['media-type'] || '').toLowerCase().trim().substr(0, 6) != 'image/') {
        return callback(new Error('Invalid mime type for image'))
      }
      this.getFile(id, callback)
    } else {
      const coverId = Object.keys(this.manifest).find(key => (
        this.manifest[key].properties === 'cover-image'))
      if (coverId) {
        this.getFile(coverId, callback)
      } else {
        callback(new Error('File not found'))
      }
    }
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16

使用 epub 库解析电子书

parse() {
    return new Promise((resolve, reject) => {
      const bookPath = `${UPLOAD_PATH}${this.path}`
      if (!this.path || !fs.existsSync(bookPath)) {
        reject(new Error('电子书路径不存在'))
      }
      const epub = new Epub(bookPath)
      epub.on('error', err => {
        reject(err)
      })
      epub.on('end', err => {
        if (err) {
          reject(err)
        } else {
          let {
            title,
            language,
            creator,
            creatorFileAs,
            publisher,
            cover
          } = epub.metadata
          // title = ''
          if (!title) {
            reject(new Error('图书标题为空'))
          } else {
            this.title = title
            this.language = language || 'en'
            this.author = creator || creatorFileAs || 'unknown'
            this.publisher = publisher || 'unknown'
            this.rootFile = epub.rootFile
            const handleGetImage = (error, imgBuffer, mimeType) => {
              if (error) {
                reject(error)
              } else {
                const suffix = mimeType.split('/')[1]
                const coverPath = `${UPLOAD_PATH}/img/${this.fileName}.${suffix}`
                const coverUrl = `${UPLOAD_URL}/img/${this.fileName}.${suffix}`
                fs.writeFileSync(coverPath, imgBuffer, 'binary')
                this.coverPath = `/img/${this.fileName}.${suffix}`
                this.cover = coverUrl
                resolve(this)
              }
            }
            try {
              this.unzip() // 解压电子书
              this.parseContents(epub)
                .then(({ chapters, chapterTree }) => {
                  this.contents = chapters
                  this.contentsTree = chapterTree
                  epub.getImage(cover, handleGetImage) // 获取封面图片
                })
                .catch(err => reject(err)) // 解析目录
            } catch (e) {
              reject(e)
            }
          }
        }
      })
      epub.parse()
      this.epub = epub
    })
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63

电子书目录解析

电子书解析过程中我们需要自定义电子书目录解析,第一步需要解压电子书:

unzip() {
    const AdmZip = require('adm-zip')
    const zip = new AdmZip(Book.genPath(this.path)) // 解析文件路径
    zip.extractAllTo(
      /*target path*/Book.genPath(this.unzipPath),
      /*overwrite*/true
    )
}
1
2
3
4
5
6
7
8

genPath 是 Book 的一个属性方法,我们可以使用 es6 的 static 属性来实现:

static genPath(path) {
    if (path.startsWith('/')) {
      return `${UPLOAD_PATH}${path}`
    } else {
      return `${UPLOAD_PATH}/${path}`
    }
}
1
2
3
4
5
6
7

电子书目录解析算法:

parseContents(epub) {
    function getNcxFilePath() {
      const manifest = epub && epub.manifest
      const spine = epub && epub.spine
      const ncx = manifest && manifest.ncx
      const toc = spine && spine.toc
      return (ncx && ncx.href) || (toc && toc.href)
    }

    /**
     * flatten方法,将目录转为一维数组
     *
     * @param array
     * @returns {*[]}
     */
    function flatten(array) {
      return [].concat(...array.map(item => {
        if (item.navPoint && item.navPoint.length) {
          return [].concat(item, ...flatten(item.navPoint))
        } else if (item.navPoint) {
          return [].concat(item, item.navPoint)
        } else {
          return item
        }
      }))
    }

    /**
     * 查询当前目录的父级目录及规定层次
     *
     * @param array
     * @param level
     * @param pid
     */
    function findParent(array, level = 0, pid = '') {
      return array.map(item => {
        item.level = level
        item.pid = pid
        if (item.navPoint && item.navPoint.length) {
          item.navPoint = findParent(item.navPoint, level + 1, item['$'].id)
        } else if (item.navPoint) {
          item.navPoint.level = level + 1
          item.navPoint.pid = item['$'].id
        }
        return item
      })
    }

    if (!this.rootFile) {
      throw new Error('目录解析失败')
    } else {
      const fileName = this.fileName
      return new Promise((resolve, reject) => {
        const ncxFilePath = Book.genPath(`${this.unzipPath}/${getNcxFilePath()}`) // 获取ncx文件路径
        const xml = fs.readFileSync(ncxFilePath, 'utf-8') // 读取ncx文件
        // 将ncx文件从xml转为json
        xml2js(xml, {
          explicitArray: false, // 设置为false时,解析结果不会包裹array
          ignoreAttrs: false  // 解析属性
        }, function(err, json) {
          if (!err) {
            const navMap = json.ncx.navMap // 获取ncx的navMap属性
            if (navMap.navPoint) { // 如果navMap属性存在navPoint属性,则说明目录存在
              navMap.navPoint = findParent(navMap.navPoint)
              const newNavMap = flatten(navMap.navPoint) // 将目录拆分为扁平结构
              const chapters = []
              epub.flow.forEach((chapter, index) => { // 遍历epub解析出来的目录
                // 如果目录大于从ncx解析出来的数量,则直接跳过
                if (index + 1 > newNavMap.length) {
                  return
                }
                const nav = newNavMap[index] // 根据index找到对应的navMap
                chapter.text = `${UPLOAD_URL}/unzip/${fileName}/${chapter.href}` // 生成章节的URL
                // console.log(`${JSON.stringify(navMap)}`)
                if (nav && nav.navLabel) { // 从ncx文件中解析出目录的标题
                  chapter.label = nav.navLabel.text || ''
                } else {
                  chapter.label = ''
                }
                chapter.level = nav.level
                chapter.pid = nav.pid
                chapter.navId = nav['$'].id
                chapter.fileName = fileName
                chapter.order = index + 1
                chapters.push(chapter)
              })
              const chapterTree = []
              chapters.forEach(c => {
                c.children = []
                if (c.pid === '') {
                  chapterTree.push(c)
                } else {
                  const parent = chapters.find(_ => _.navId === c.pid)
                  parent.children.push(c)
                }
              }) // 将目录转化为树状结构
              resolve({ chapters, chapterTree })
            } else {
              reject(new Error('目录解析失败,navMap.navPoint error'))
            }
          } else {
            reject(err)
          }
        })
      })
    }
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107

Book 对象的其他方法

toJson() {
    return {
      path: this.path,
      url: this.url,
      title: this.title,
      language: this.language,
      author: this.author,
      publisher: this.publisher,
      cover: this.cover,
      coverPath: this.coverPath,
      unzipPath: this.unzipPath,
      unzipUrl: this.unzipUrl,
      category: this.category,
      categoryText: this.categoryText,
      contents: this.contents,
      contentsTree: this.contentsTree,
      originalName: this.originalName,
      rootFile: this.rootFile,
      fileName: this.fileName,
      filePath: this.filePath
    }
}

toDb() {
    return {
      fileName: this.fileName,
      cover: this.cover,
      title: this.title,
      author: this.author,
      publisher: this.publisher,
      bookId: this.bookId,
      updateType: this.updateType,
      language: this.language,
      rootFile: this.rootFile,
      originalName: this.originalName,
      filePath: this.path,
      unzipPath: this.unzipPath,
      coverPath: this.coverPath,
      createUser: this.createUser,
      createDt: this.createDt,
      updateDt: this.updateDt,
      category: this.category || 99,
      categoryText: this.categoryText || '自定义'
    }
}

getContents() {
    return this.contents
}

reset() {
    if (this.path && Book.pathExists(this.path)) {
      fs.unlinkSync(Book.genPath(this.path))
    }
    if (this.filePath && Book.pathExists(this.filePath)) {
      fs.unlinkSync(Book.genPath(this.filePath))
    }
    if (this.coverPath && Book.pathExists(this.coverPath)) {
      fs.unlinkSync(Book.genPath(this.coverPath))
    }
    if (this.unzipPath && Book.pathExists(this.unzipPath)) {
      // 注意node低版本将不支持第二个属性
      fs.rmdirSync(Book.genPath(this.unzipPath), { recursive: true })
    }
}
  
static pathExists(path) {
    if (path.startsWith(UPLOAD_PATH)) {
      return fs.existsSync(path)
    } else {
      return fs.existsSync(Book.genPath(path))
    }
}

static genCoverUrl(book) {
    console.log('genCoverUrl', book)
    if (Number(book.updateType) === 0) {
      const { cover } = book
      if (cover) {
        if (cover.startsWith('/')) {
          return `${OLD_UPLOAD_URL}${cover}`
        } else {
          return `${OLD_UPLOAD_URL}/${cover}`
        }
      } else {
        return null
      }
    } else {
      if (book.cover) {
        if (book.cover.startsWith('/')) {
          return `${UPLOAD_URL}${book.cover}`
        } else {
          return `${UPLOAD_URL}/${book.cover}`
        }
      } else {
        return null
      }
    }
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
上次更新: 11/30/2019, 4:53:53 PM