451 lines
11 KiB
JavaScript
451 lines
11 KiB
JavaScript
/*
|
|
This is borrowed from koodo-reader https://github.com/troyeguo/koodo-reader/tree/master/src
|
|
*/
|
|
|
|
function ab2str(buf) {
|
|
if (buf instanceof ArrayBuffer) {
|
|
buf = new Uint8Array(buf);
|
|
}
|
|
return new TextDecoder("utf-8").decode(buf);
|
|
}
|
|
|
|
var domParser = new DOMParser();
|
|
|
|
class Buffer {
|
|
capacity;
|
|
fragment_list;
|
|
imageArray;
|
|
cur_fragment;
|
|
constructor(capacity) {
|
|
this.capacity = capacity;
|
|
this.fragment_list = [];
|
|
this.imageArray = [];
|
|
this.cur_fragment = new Fragment(capacity);
|
|
this.fragment_list.push(this.cur_fragment);
|
|
}
|
|
write(byte) {
|
|
var result = this.cur_fragment.write(byte);
|
|
if (!result) {
|
|
this.cur_fragment = new Fragment(this.capacity);
|
|
this.fragment_list.push(this.cur_fragment);
|
|
this.cur_fragment.write(byte);
|
|
}
|
|
}
|
|
get(idx) {
|
|
var fi = 0;
|
|
while (fi < this.fragment_list.length) {
|
|
var frag = this.fragment_list[fi];
|
|
if (idx < frag.size) {
|
|
return frag.get(idx);
|
|
}
|
|
idx -= frag.size;
|
|
fi += 1;
|
|
}
|
|
return null;
|
|
}
|
|
size() {
|
|
var s = 0;
|
|
for (var i = 0; i < this.fragment_list.length; i++) {
|
|
s += this.fragment_list[i].size;
|
|
}
|
|
return s;
|
|
}
|
|
shrink() {
|
|
var total_buffer = new Uint8Array(this.size());
|
|
var offset = 0;
|
|
for (var i = 0; i < this.fragment_list.length; i++) {
|
|
var frag = this.fragment_list[i];
|
|
if (frag.full()) {
|
|
total_buffer.set(frag.buffer, offset);
|
|
} else {
|
|
total_buffer.set(frag.buffer.slice(0, frag.size), offset);
|
|
}
|
|
offset += frag.size;
|
|
}
|
|
return total_buffer;
|
|
}
|
|
}
|
|
|
|
var copagesne_uint8array = function (buffers) {
|
|
var total_size = 0;
|
|
for (let i = 0; i < buffers.length; i++) {
|
|
var buffer = buffers[i];
|
|
total_size += buffer.length;
|
|
}
|
|
var total_buffer = new Uint8Array(total_size);
|
|
var offset = 0;
|
|
for (let i = 0; i < buffers.length; i++) {
|
|
buffer = buffers[i];
|
|
total_buffer.set(buffer, offset);
|
|
offset += buffer.length;
|
|
}
|
|
return total_buffer;
|
|
};
|
|
|
|
class Fragment {
|
|
buffer;
|
|
capacity;
|
|
size;
|
|
constructor(capacity) {
|
|
this.buffer = new Uint8Array(capacity);
|
|
this.capacity = capacity;
|
|
this.size = 0;
|
|
}
|
|
|
|
write(byte) {
|
|
if (this.size >= this.capacity) {
|
|
return false;
|
|
}
|
|
this.buffer[this.size] = byte;
|
|
this.size += 1;
|
|
return true;
|
|
}
|
|
full() {
|
|
return this.size === this.capacity;
|
|
}
|
|
get(idx) {
|
|
return this.buffer[idx];
|
|
}
|
|
}
|
|
|
|
var uncompression_lz77 = function (data) {
|
|
var length = data.length;
|
|
var offset = 0; // Current offset into data
|
|
var buffer = new Buffer(data.length);
|
|
|
|
while (offset < length) {
|
|
var char = data[offset];
|
|
offset += 1;
|
|
|
|
if (char === 0) {
|
|
buffer.write(char);
|
|
} else if (char <= 8) {
|
|
for (var i = offset; i < offset + char; i++) {
|
|
buffer.write(data[i]);
|
|
}
|
|
offset += char;
|
|
} else if (char <= 0x7f) {
|
|
buffer.write(char);
|
|
} else if (char <= 0xbf) {
|
|
var next = data[offset];
|
|
offset += 1;
|
|
var distance = (((char << 8) | next) >> 3) & 0x7ff;
|
|
var lz_length = (next & 0x7) + 3;
|
|
|
|
var buffer_size = buffer.size();
|
|
for (let i = 0; i < lz_length; i++) {
|
|
buffer.write(buffer.get(buffer_size - distance));
|
|
buffer_size += 1;
|
|
}
|
|
} else {
|
|
buffer.write(32);
|
|
buffer.write(char ^ 0x80);
|
|
}
|
|
}
|
|
return buffer;
|
|
};
|
|
|
|
class MobiFile {
|
|
view;
|
|
buffer;
|
|
offset;
|
|
header;
|
|
palm_header;
|
|
mobi_header;
|
|
reclist;
|
|
constructor(data) {
|
|
this.view = new DataView(data);
|
|
this.buffer = this.view.buffer;
|
|
this.offset = 0;
|
|
this.header = null;
|
|
}
|
|
|
|
parse() { }
|
|
|
|
getUint8() {
|
|
var v = this.view.getUint8(this.offset);
|
|
this.offset += 1;
|
|
return v;
|
|
}
|
|
|
|
getUint16() {
|
|
var v = this.view.getUint16(this.offset);
|
|
this.offset += 2;
|
|
return v;
|
|
}
|
|
|
|
getUint32() {
|
|
var v = this.view.getUint32(this.offset);
|
|
this.offset += 4;
|
|
return v;
|
|
}
|
|
|
|
getStr(size) {
|
|
var v = ab2str(this.buffer.slice(this.offset, this.offset + size));
|
|
this.offset += size;
|
|
return v;
|
|
}
|
|
|
|
skip(size) {
|
|
this.offset += size;
|
|
}
|
|
|
|
setoffset(_of) {
|
|
this.offset = _of;
|
|
}
|
|
|
|
get_record_extrasize(data, flags) {
|
|
var pos = data.length - 1;
|
|
var extra = 0;
|
|
for (var i = 15; i > 0; i--) {
|
|
if (flags & (1 << i)) {
|
|
var res = this.buffer_get_varlen(data, pos);
|
|
var size = res[0];
|
|
var l = res[1];
|
|
pos = res[2];
|
|
pos -= size - l;
|
|
extra += size;
|
|
}
|
|
}
|
|
if (flags & 1) {
|
|
var a = data[pos];
|
|
extra += (a & 0x3) + 1;
|
|
}
|
|
return extra;
|
|
}
|
|
|
|
// data should be uint8array
|
|
buffer_get_varlen(data, pos) {
|
|
var l = 0;
|
|
var size = 0;
|
|
var byte_count = 0;
|
|
var mask = 0x7f;
|
|
var stop_flag = 0x80;
|
|
var shift = 0;
|
|
for (var i = 0; ; i++) {
|
|
var byte = data[pos];
|
|
size |= (byte & mask) << shift;
|
|
shift += 7;
|
|
l += 1;
|
|
byte_count += 1;
|
|
pos -= 1;
|
|
|
|
var to_stop = byte & stop_flag;
|
|
if (byte_count >= 4 || to_stop > 0) {
|
|
break;
|
|
}
|
|
}
|
|
return [size, l, pos];
|
|
}
|
|
// 读出文本内容
|
|
read_text() {
|
|
var text_end = this.palm_header.record_count;
|
|
var buffers = [];
|
|
for (var i = 1; i <= text_end; i++) {
|
|
buffers.push(this.read_text_record(i));
|
|
}
|
|
var all = copagesne_uint8array(buffers);
|
|
return ab2str(all);
|
|
}
|
|
|
|
read_text_record(i) {
|
|
var flags = this.mobi_header.extra_flags;
|
|
var begin = this.reclist[i].offset;
|
|
var end = this.reclist[i + 1].offset;
|
|
|
|
var data = new Uint8Array(this.buffer.slice(begin, end));
|
|
var ex = this.get_record_extrasize(data, flags);
|
|
|
|
data = new Uint8Array(this.buffer.slice(begin, end - ex));
|
|
if (this.palm_header.compression === 2) {
|
|
var buffer = uncompression_lz77(data);
|
|
return buffer.shrink();
|
|
} else {
|
|
return data;
|
|
}
|
|
}
|
|
// 从buffer中读出image
|
|
read_image(idx) {
|
|
var first_image_idx = this.mobi_header.first_image_idx;
|
|
var begin = this.reclist[first_image_idx + idx].offset;
|
|
var end = this.reclist[first_image_idx + idx + 1].offset;
|
|
var data = new Uint8Array(this.buffer.slice(begin, end));
|
|
return new Blob([data.buffer]);
|
|
}
|
|
|
|
load() {
|
|
this.header = this.load_pdbheader();
|
|
this.reclist = this.load_reclist();
|
|
this.load_record0();
|
|
}
|
|
|
|
load_pdbheader() {
|
|
var header = {};
|
|
header.name = this.getStr(32);
|
|
header.attr = this.getUint16();
|
|
header.version = this.getUint16();
|
|
header.ctime = this.getUint32();
|
|
header.mtime = this.getUint32();
|
|
header.btime = this.getUint32();
|
|
header.mod_num = this.getUint32();
|
|
header.appinfo_offset = this.getUint32();
|
|
header.sortinfo_offset = this.getUint32();
|
|
header.type = this.getStr(4);
|
|
header.creator = this.getStr(4);
|
|
header.uid = this.getUint32();
|
|
header.next_rec = this.getUint32();
|
|
header.record_num = this.getUint16();
|
|
return header;
|
|
}
|
|
|
|
load_reclist() {
|
|
var reclist = [];
|
|
for (var i = 0; i < this.header.record_num; i++) {
|
|
var record = {};
|
|
record.offset = this.getUint32();
|
|
// TODO(zz) change
|
|
record.attr = this.getUint32();
|
|
reclist.push(record);
|
|
}
|
|
return reclist;
|
|
}
|
|
load_record0() {
|
|
this.palm_header = this.load_record0_header();
|
|
this.mobi_header = this.load_mobi_header();
|
|
}
|
|
|
|
load_record0_header() {
|
|
var p_header = {};
|
|
var first_record = this.reclist[0];
|
|
this.setoffset(first_record.offset);
|
|
|
|
p_header.compression = this.getUint16();
|
|
this.skip(2);
|
|
p_header.text_length = this.getUint32();
|
|
p_header.record_count = this.getUint16();
|
|
p_header.record_size = this.getUint16();
|
|
p_header.encryption_type = this.getUint16();
|
|
this.skip(2);
|
|
|
|
return p_header;
|
|
}
|
|
|
|
load_mobi_header() {
|
|
var mobi_header = {};
|
|
|
|
var start_offset = this.offset;
|
|
|
|
mobi_header.identifier = this.getUint32();
|
|
mobi_header.header_length = this.getUint32();
|
|
mobi_header.mobi_type = this.getUint32();
|
|
mobi_header.text_encoding = this.getUint32();
|
|
mobi_header.uid = this.getUint32();
|
|
mobi_header.generator_version = this.getUint32();
|
|
|
|
this.skip(40);
|
|
|
|
mobi_header.first_nonbook_index = this.getUint32();
|
|
mobi_header.full_name_offset = this.getUint32();
|
|
mobi_header.full_name_length = this.getUint32();
|
|
|
|
mobi_header.language = this.getUint32();
|
|
mobi_header.input_language = this.getUint32();
|
|
mobi_header.output_language = this.getUint32();
|
|
mobi_header.min_version = this.getUint32();
|
|
mobi_header.first_image_idx = this.getUint32();
|
|
|
|
mobi_header.huff_rec_index = this.getUint32();
|
|
mobi_header.huff_rec_count = this.getUint32();
|
|
mobi_header.datp_rec_index = this.getUint32();
|
|
mobi_header.datp_rec_count = this.getUint32();
|
|
|
|
mobi_header.exth_flags = this.getUint32();
|
|
|
|
this.skip(36);
|
|
|
|
mobi_header.drm_offset = this.getUint32();
|
|
mobi_header.drm_count = this.getUint32();
|
|
mobi_header.drm_size = this.getUint32();
|
|
mobi_header.drm_flags = this.getUint32();
|
|
|
|
this.skip(8);
|
|
|
|
// TODO (zz) fdst_index
|
|
this.skip(4);
|
|
|
|
this.skip(46);
|
|
|
|
mobi_header.extra_flags = this.getUint16();
|
|
|
|
this.setoffset(start_offset + mobi_header.header_length);
|
|
|
|
return mobi_header;
|
|
}
|
|
load_exth_header() {
|
|
// TODO
|
|
return {};
|
|
}
|
|
extractContent(s) {
|
|
var span = document.createElement("span");
|
|
span.innerHTML = s;
|
|
return span.textContent || span.innerText;
|
|
}
|
|
render(isElectron = false) {
|
|
return new Promise((resolve, reject) => {
|
|
this.load();
|
|
var content = this.read_text();
|
|
var bookDoc = domParser.parseFromString(content, "text/html")
|
|
.documentElement;
|
|
let lines = Array.from(
|
|
bookDoc.querySelectorAll("p,b,font,h3,h2,h1")
|
|
);
|
|
let parseContent = [];
|
|
for (let i = 0, len = lines.length; i < len - 1; i++) {
|
|
lines[i].innerText &&
|
|
lines[i].innerText !== parseContent[parseContent.length - 1] &&
|
|
parseContent.push(lines[i].innerText);
|
|
let imgDoms = lines[i].getElementsByTagName("img");
|
|
if (imgDoms.length > 0) {
|
|
for (let i = 0; i < imgDoms.length; i++) {
|
|
parseContent.push("#image");
|
|
}
|
|
}
|
|
}
|
|
const handleImage = async () => {
|
|
var imgDoms = bookDoc.getElementsByTagName("img");
|
|
parseContent.push("~image");
|
|
for (let i = 0; i < imgDoms.length; i++) {
|
|
const src = await this.render_image(imgDoms, i);
|
|
parseContent.push(
|
|
src + " " + imgDoms[i].width + " " + imgDoms[i].height
|
|
);
|
|
}
|
|
if (imgDoms.length > 200 || !isElectron) {
|
|
resolve(bookDoc);
|
|
} else {
|
|
resolve(parseContent.join("\n \n"));
|
|
}
|
|
};
|
|
handleImage();
|
|
});
|
|
}
|
|
render_image = (imgDoms, i) => {
|
|
return new Promise((resolve, reject) => {
|
|
var imgDom = imgDoms[i];
|
|
var idx = +imgDom.getAttribute("recindex");
|
|
var blob = this.read_image(idx - 1);
|
|
var imgReader = new FileReader();
|
|
imgReader.onload = (e) => {
|
|
imgDom.src = e.target?.result;
|
|
resolve(e.target?.result);
|
|
};
|
|
imgReader.onerror = function (err) {
|
|
reject(err);
|
|
};
|
|
imgReader.readAsDataURL(blob);
|
|
});
|
|
};
|
|
}
|
|
|
|
export default MobiFile;
|