audiobookshelf/server/utils/parsers/parseNameString.js

103 lines
3.2 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//
// This takes a string and parsed out first and last names
// accepts comma separated lists e.g. "Jon Smith, Jane Smith" or "Smith, Jon, Smith, Jane"
// can be separated by "&" e.g. "Jon Smith & Jane Smith" or "Smith, Jon & Smith, Jane"
//
const parseFullName = require('./parseFullName')
function parseName(name) {
var parts = parseFullName(name)
var firstName = parts.first
if (firstName && parts.middle) firstName += ' ' + parts.middle
return {
first_name: firstName,
last_name: parts.last
}
}
// Check if this name segment is of the format "Last, First" or "First Last"
// return true is "Last, First"
function checkIsALastName(name) {
if (!name.includes(' ')) return true // No spaces must be a Last name
var parsed = parseFullName(name)
if (!parsed.first) return true // had spaces but not a first name i.e. "von Mises", must be last name only
return false
}
// Handle name already in First Last format and return Last, First
module.exports.nameToLastFirst = (firstLast) => {
var nameObj = parseName(firstLast)
if (!nameObj.last_name) return nameObj.first_name
else if (!nameObj.first_name) return nameObj.last_name
return `${nameObj.last_name}, ${nameObj.first_name}`
}
// Handle any name string
module.exports.parse = (nameString) => {
if (!nameString) return null
var splitNames = []
// Example &LF: Friedman, Milton & Friedman, Rose
if (nameString.includes('&')) {
nameString.split('&').forEach((asa) => (splitNames = splitNames.concat(asa.split(','))))
} else if (nameString.includes(' and ')) {
nameString.split(' and ').forEach((asa) => (splitNames = splitNames.concat(asa.split(','))))
} else if (nameString.includes(';')) {
nameString.split(';').forEach((asa) => (splitNames = splitNames.concat(asa.split(','))))
} else {
splitNames = nameString.split(',')
}
if (splitNames.length) splitNames = splitNames.map((a) => a.trim())
// If names are in ChineseJapanese and Korean languages, return as is.
if (/[\u4e00-\u9fff\u3040-\u30ff\u31f0-\u31ff]/.test(splitNames[0])) {
return {
names: splitNames
}
}
var names = []
// 1 name FIRST LAST
if (splitNames.length === 1) {
names.push(parseName(nameString))
} else {
var firstChunkIsALastName = checkIsALastName(splitNames[0])
var isEvenNum = splitNames.length % 2 === 0
if (!isEvenNum && firstChunkIsALastName) {
// console.error('Multi-name LAST,FIRST entry has a straggler (could be roman numerals or a suffix), ignore it')
splitNames = splitNames.slice(0, splitNames.length - 1)
}
if (firstChunkIsALastName) {
var num = splitNames.length / 2
for (let i = 0; i < num; i++) {
var last = splitNames.shift()
var first = splitNames.shift()
names.push({
first_name: first,
last_name: last
})
}
} else {
splitNames.forEach((segment) => {
names.push(parseName(segment))
})
}
}
// Filter out names that have no first and last
names = names.filter((n) => n.first_name || n.last_name)
// Set name strings and remove duplicates
const namesArray = [...new Set(names.map((a) => (a.first_name ? `${a.first_name} ${a.last_name}` : a.last_name)))]
return {
names: namesArray // Array of first last
}
}