Angry1947 2020-11-09 17:02:01
php,解析marc数据
/**
* 总的来说,这个功能很简单,就是读取文件,循环拼接字符串,存起来。或者写入其他什么地方
*/
/** 注: 启示帖子:https://blog.csdn.net/chaishen10000/article/details/79245055 */
/**
* 虽然写的很垃圾,但是确实代码原创;有具体疑问可以联系我
* 因为是闲来无聊,翟出来的删减了一些业务逻辑,需要的自行增加
* 重要的地方都用注释标记了,注意重点查看
* 框架选用 thinkphp
* @赵
* @email lx5231616@163.com
*/
//数据导入 上传excel
function collectionImport()
{
ini_set("memory_limit", "1024M");
set_time_limit(0);
$file_path = "./test.IOS";
header("content-type:text/html;charset=GBK"); //设置编码格式放置乱码
$myfile = fopen($file_path, "r") or die("Unable to open file!"); // 打开文件 $file_path 一个上传到服务器的.IOS(marc)文件
$values1 = "";
$a = fread($myfile, filesize($file_path)); //读取需要解析的文件
$list = explode("\n", $a); //读取后通过 "\n" 文件中的换行符将数据分割
$run_num = ceil(count($list) / 5000); //获取总的数组数量,我这里按5000分割
$datas = array();
/**
* 循环截取数组,组成新的数组
*/
for ($a = 0; $a < $run_num; $a++) {
$datas[] = array_slice($list, $a * 5000, 5000);
}
/**
* 开始循环
*/
foreach ($datas as $nm => $nb) {
foreach ($nb as $nms => $nbs) {
$headers = substr($nbs, 0, 24); //截取完整的头标区:0~23(长度24)
$header['header'] = $headers;
$header['length'] = substr($headers, 0, 5); //头标区0~4 数据的总长度
$header['sjdz'] = substr($headers, 12, 5); //头标区12~16 数据基地址,即数据起始位置
$header['mcq'] = substr($nbs, 24, $header['sjdz'] - 25); //获取目次区地址
for ($i = 0; $i < $header['sjdz'] - 25; $i += 12) {
$header['ml'][$i][0] = substr($header['mcq'], $i, 3);
$header['ml'][$i][1] = substr($header['mcq'], $i + 3, 4);
$header['ml'][$i][2] = substr($header['mcq'], $i + 7, 5);
}
$sjq = substr($nbs, (int) $header['sjdz'], (int) $header['length'] - (int) $header['sjdz']);
$arr = array();
/**==========下面就是一些循环判断的拼接了就不多解释了,需要具体了解的话去百度文库自行查看:https://wenku.baidu.com/view/8cea0d74afaad1f34693daef5ef7ba0d4a736d82.html =============*/
foreach ($header['ml'] as $k => $v) {
switch ($v[0]) {
case "001":
$arr['bsh_'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "010":
$arr['bzISBN'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "100":
$arr['tyclsj_'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "101":
$arr['ssyz'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "102":
$arr['cbd'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "105":
$arr['tsbmsj'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "106":
$arr['wzzldm_'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "200":
$arr['title'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "205":
$arr['bbsm'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "210":
$arr['cbinfo'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "215":
$arr['sjinfo'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "225":
$arr['cbtm'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "300":
$arr['ybfznr_'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "303":
$arr['sjzl_'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "304":
$arr['tmzrz_'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "320":
$arr['smsy'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "327":
$arr['nrfznr'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "330":
$arr['nrjj'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "410":
$arr['ygsj'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "510":
$arr['bltm'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "606":
$arr['zbt'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "690":
$arr['ztfl'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "701":
$arr['zzxx'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "702":
$arr['zzxx'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "711":
$arr['zzxx'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "712":
$arr['zzxx'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "801":
$arr['gjdm_'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
case "905":
$arr['scxx_'] = substr($sjq, (int) $v[2], (int) $v[1]);
break;
}
$header['ml'][$k]['info'] = substr($sjq, (int) $v[2], (int) $v[1]);
}
$strs = array();
$str['sjbm'] = explode('', $arr['title']);
foreach ($str['sjbm'] as $k => $v) {
if (substr($str['sjbm'][$k], 0, 1) == 'a') {
$strs['sjbm'] = substr($str['sjbm'][$k], 1);
}
if (substr($str['sjbm'][$k], 0, 1) == 'f') {
$strs['zzxx'] = str_replace('', '', substr($str['sjbm'][$k], 1));
}
}
$str['bzISBN'] = explode('', $arr['bzISBN']);
foreach ($str['bzISBN'] as $k => $v) {
if (substr($str['bzISBN'][$k], 0, 1) == 'a') {
$strs['bzISBN'] = str_replace('-', '', substr($str['bzISBN'][$k], 1));
}
if (substr($str['bzISBN'][$k], 0, 1) == 'd') {
$floats = str_replace('', '', preg_replace("/[a-z,A-Z]/", "", $str['bzISBN'][$k]));
$strs['sjjg'] = $floats;
}
}
$str['cbinfo'] = explode('', $arr['cbinfo']);
if ($str['cbinfo']) {
foreach ($str['cbinfo'] as $k => $v) {
if (substr($str['cbinfo'][$k], 0, 1) == 'c') {
$strs['cbsm'] = substr($str['cbinfo'][$k], 1);
}
if (substr($str['cbinfo'][$k], 0, 1) == 'a') {
$strs['cbdd'] = substr($str['cbinfo'][$k], 1);
}
if (substr($str['cbinfo'][$k], 0, 1) == 'd') {
$strs['cbrq'] = substr($str['cbinfo'][$k], 1, 4);
}
}
}
$str['ztfl'] = explode('', $arr['ztfl']);
if ($str['ztfl']) {
foreach ($str['ztfl'] as $k => $v) {
if (substr($str['ztfl'][$k], 0, 1) == 'a') {
$strs['ztfl'] = substr($str['ztfl'][$k], 1);
}
}
}
$str['sjinfo'] = explode('', $arr['sjinfo']);
foreach ($str['sjinfo'] as $k => $v) {
if (substr($str['sjinfo'][$k], 0, 1) == 'a') {
$strs['sjym'] = str_replace('-', '', substr($str['sjinfo'][$k], 1));
}
if (substr($str['sjinfo'][$k], 0, 1) == 'd') {
$floats = str_replace('', '', preg_replace("/[a-z,A-Z]/", "", $str['sjinfo'][$k]));
$strs['sjkb'] = $floats . 'cm';
}
}
$str['nrjj'] = explode('', $arr['nrjj']);
foreach ($str['nrjj'] as $k => $v) {
if (substr($str['nrjj'][$k], 0, 1) == 'a') {
$strs['nrjj'] = str_replace('-', '', substr($str['nrjj'][$k], 1, -1));
}
}
$str['ssyz'] = explode('', $arr['ssyz']);
foreach ($str['ssyz'] as $k => $v) {
if (substr($str['ssyz'][$k], 0, 1) == 'a') {
$strs['ssyz'] = str_replace('-', '', substr($str['ssyz'][$k], 1, -1));
}
}
$strs['rksj'] = date("Y-m-d H:i:s");
$strs['ssh'] = $strs['ztfl'] . "/" . $strs['zch'];
/**重要:转码,不然会乱码 */
foreach ($strs as $k => $v) {
$strs[$k] = iconv("GBK", "UTF-8", $v);
}
if (!$strs['sjbm']) {
$strs['sjbm'] = '';
} else {
$strs['sjbm'] = str_replace("'", "", $strs['sjbm']);
}
if (!$strs['zzxx']) {
$strs['zzxx'] = '';
} else {
$strs['zzxx'] = str_replace("'", "", $strs['zzxx']);
}
if (!$strs['bzISBN']) {
$strs['bzISBN'] = '';
} else {
$strs['bzISBN'] = str_replace("'", "", $strs['bzISBN']);
}
if (!$strs['sjjg']) {
$strs['sjjg'] = '';
} else {
$strs['sjjg'] = str_replace("'", "", $strs['sjjg']);
}
if (!$strs['cbdd']) {
$strs['cbdd'] = '';
} else {
$strs['cbdd'] = str_replace("'", "", $strs['cbdd']);
}
if (!$strs['cbsm']) {
$strs['cbsm'] = '';
} else {
$strs['cbsm'] = str_replace("'", "", $strs['cbsm']);
}
if (!$strs['cbrq']) {
$strs['cbrq'] = '';
} else {
str_replace("'", "", $strs['cbrq']);
}
if (!$strs['ztfl']) {
$strs['ztfl'] = '';
} else {
$strs['ztfl'] = str_replace("'", "", $strs['ztfl']);
}
if (!$strs['sjym']) {
$strs['sjym'] = '';
} else {
$strs['sjym'] = str_replace("'", "", $strs['sjym']);
}
if (!$strs['sjkb']) {
$strs['sjkb'] = '';
} else {
$strs['sjkb'] = str_replace("'", "", $strs['sjkb']);
}
if (!$strs['nrjj']) {
$strs['nrjj'] = '';
} else {
$strs['nrjj'] = str_replace("'", "", $strs['nrjj']);
}
if (!$strs['sjbh']) {
$strs['sjbh'] = '';
} else {
$strs['sjbh'] = str_replace("'", "", $strs['sjbh']);
}
if (!$strs['cszl']) {
$strs['cszl'] = '';
} else {
$strs['cszl'] = str_replace("'", "", $strs['cszl']);
}
if (!$strs['rksj']) {
$strs['rksj'] = '';
} else {
$strs['rksj'] = str_replace("'", "", $strs['rksj']);
}
if (!$strs['zch']) {
$strs['zch'] = '';
} else {
$strs['zch'] = str_replace("'", "", $strs['zch']);
}
if (!$strs['ssh']) {
$strs['ssh'] = '';
} else {
$strs['ssh'] = str_replace("'", "", $strs['ssh']);
}
/**
* 将$strs 循环拼接成sql语句
*/
$values1 .= "(";
$values1 .= "'" . $strs['sjbm'] . "'" . "," . "'" . $strs['zzxx'] . "'" . "," . "'" . $strs['bzISBN'] . "'" . "," . "'" . $strs['sjjg'] . "'" . "," . "'" . $strs['cbdd'] . "'" . "," . "'" . $strs['cbsm'] . "'" . "," . "'" . $strs['cbrq'] . "'" . "," . "'" . $strs['ztfl'] . "'" . "," . "'" . $strs['sjym'] . "'" . "," . "'" . $strs['sjkb'] . "'" . "," . "'" . $strs['nrjj'] . "'" . "," . "'" . $strs['sjbh'] . "'" . "," . "'" . $strs['cszl'] . "'" . "," . "'" . $strs['gnsy'] . "'" . "," . "'" . $strs['rksj'] . "'" . "," . "'" . $strs['zch'] . "'" . "," . "'" . $strs['ssh'] . "'" . "," . "'" . $strs['zgzt'] . "'" . "," . "'" . $strs['gcd'] . "'" . "," . "'" . $_SESSION['gid'] . "'" . "," . "'" . $strs['ssyz'] . "','" . $strs['flmc'] . "'";
$values1 .= "),";
$values2 = substr($values1, 0, -1) . ",";
}
$sql = "insert into tbookinfo (`sjbm`,`zzxx`,`bzISBN`,`sjjg`,`cbdd`,`cbsm`,`cbrq`,`ztfl`,`sjym`,`sjkb`,`nrjj`,`sjbh`,`rksj`,`zch`,`ssh`) values";
$batchs = substr($sql . $values2, 0, -1);
$res = D()->query($batchs, 'insert');
//清空values1 因为之前的都是 .=拼接的 所以在这里需要重新给valuses1 赋值为空
$values1 = "";
}
}