Angry1947 2020-11-09 17:02:01

php,解析marc数据

	/**
	 * 总的来说,这个功能很简单,就是读取文件,循环拼接字符串,存起来。或者写入其他什么地方
	 */



	/** 注:  启示帖子:https://blog.csdn.net/chaishen10000/article/details/79245055 */


	/**
	 * 虽然写的很垃圾,但是确实代码原创;有具体疑问可以联系我
	 * 因为是闲来无聊,翟出来的删减了一些业务逻辑,需要的自行增加
	 * 重要的地方都用注释标记了,注意重点查看
	 * 框架选用 thinkphp
	 * @赵
	 * @email  lx5231616@163.com
	 */


	//数据导入  上传excel
	function collectionImport()
	{

		ini_set("memory_limit", "1024M");
		set_time_limit(0);

		$file_path = "./test.IOS";

		header("content-type:text/html;charset=GBK");       //设置编码格式放置乱码
		$myfile = fopen($file_path, "r") or die("Unable to open file!");       //  打开文件  $file_path  一个上传到服务器的.IOS(marc)文件
		$values1 = "";
		$a = fread($myfile, filesize($file_path));		//读取需要解析的文件
		$list = explode("\n", $a);						//读取后通过  "\n" 文件中的换行符将数据分割
		$run_num = ceil(count($list) / 5000);			//获取总的数组数量,我这里按5000分割
		$datas = array();

		/**
		 * 循环截取数组,组成新的数组
		 */
		for ($a = 0; $a < $run_num; $a++) {
			$datas[] = array_slice($list, $a * 5000, 5000);
		}


		/**
		 * 开始循环
		 */

		foreach ($datas as $nm => $nb) {

			foreach ($nb as $nms => $nbs) {

				$headers = substr($nbs, 0, 24);		//截取完整的头标区:0~23(长度24)
				$header['header'] = $headers;
				$header['length'] = substr($headers, 0, 5);		//头标区0~4 数据的总长度
				$header['sjdz'] = substr($headers, 12, 5);		//头标区12~16 数据基地址,即数据起始位置
				$header['mcq'] = substr($nbs, 24, $header['sjdz'] - 25);     //获取目次区地址

				for ($i = 0; $i < $header['sjdz'] - 25; $i += 12) {
					$header['ml'][$i][0] = substr($header['mcq'], $i, 3);
					$header['ml'][$i][1] = substr($header['mcq'], $i + 3, 4);
					$header['ml'][$i][2] = substr($header['mcq'], $i + 7, 5);
				}

				$sjq = substr($nbs, (int) $header['sjdz'], (int) $header['length'] - (int) $header['sjdz']);

				$arr = array();

				/**==========下面就是一些循环判断的拼接了就不多解释了,需要具体了解的话去百度文库自行查看:https://wenku.baidu.com/view/8cea0d74afaad1f34693daef5ef7ba0d4a736d82.html =============*/

				foreach ($header['ml'] as $k => $v) {
					switch ($v[0]) {
						case "001":
							$arr['bsh_'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "010":
							$arr['bzISBN'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "100":
							$arr['tyclsj_'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "101":
							$arr['ssyz'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "102":
							$arr['cbd'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "105":
							$arr['tsbmsj'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "106":
							$arr['wzzldm_'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "200":
							$arr['title'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "205":
							$arr['bbsm'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "210":
							$arr['cbinfo'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "215":
							$arr['sjinfo'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "225":
							$arr['cbtm'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "300":
							$arr['ybfznr_'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "303":
							$arr['sjzl_'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "304":
							$arr['tmzrz_'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "320":
							$arr['smsy'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "327":
							$arr['nrfznr'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "330":
							$arr['nrjj'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "410":
							$arr['ygsj'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "510":
							$arr['bltm'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "606":
							$arr['zbt'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "690":
							$arr['ztfl'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "701":
							$arr['zzxx'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "702":
							$arr['zzxx'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "711":
							$arr['zzxx'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "712":
							$arr['zzxx'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "801":
							$arr['gjdm_'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
						case "905":
							$arr['scxx_'] = substr($sjq, (int) $v[2], (int) $v[1]);
							break;
					}
					$header['ml'][$k]['info'] = substr($sjq, (int) $v[2], (int) $v[1]);
				}


				$strs = array();
				$str['sjbm'] = explode('', $arr['title']);
				foreach ($str['sjbm'] as $k => $v) {
					if (substr($str['sjbm'][$k], 0, 1) == 'a') {
						$strs['sjbm'] = substr($str['sjbm'][$k], 1);
					}
					if (substr($str['sjbm'][$k], 0, 1) == 'f') {
						$strs['zzxx'] = str_replace('', '', substr($str['sjbm'][$k], 1));
					}
				}


				$str['bzISBN'] = explode('', $arr['bzISBN']);



				foreach ($str['bzISBN'] as $k => $v) {
					if (substr($str['bzISBN'][$k], 0, 1) == 'a') {
						$strs['bzISBN'] = str_replace('-', '', substr($str['bzISBN'][$k], 1));
					}
					if (substr($str['bzISBN'][$k], 0, 1) == 'd') {
						$floats = str_replace('', '', preg_replace("/[a-z,A-Z]/", "", $str['bzISBN'][$k]));
						$strs['sjjg'] = $floats;
					}
				}


				$str['cbinfo'] = explode('', $arr['cbinfo']);
				if ($str['cbinfo']) {


					foreach ($str['cbinfo'] as $k => $v) {
						if (substr($str['cbinfo'][$k], 0, 1) == 'c') {
							$strs['cbsm'] = substr($str['cbinfo'][$k], 1);
						}
						if (substr($str['cbinfo'][$k], 0, 1) == 'a') {
							$strs['cbdd'] = substr($str['cbinfo'][$k], 1);
						}
						if (substr($str['cbinfo'][$k], 0, 1) == 'd') {
							$strs['cbrq'] = substr($str['cbinfo'][$k], 1, 4);
						}
					}
				}

				$str['ztfl'] = explode('', $arr['ztfl']);
				if ($str['ztfl']) {
					foreach ($str['ztfl'] as $k => $v) {
						if (substr($str['ztfl'][$k], 0, 1) == 'a') {
							$strs['ztfl'] = substr($str['ztfl'][$k], 1);
						}
					}
				}

				$str['sjinfo'] = explode('', $arr['sjinfo']);

				foreach ($str['sjinfo'] as $k => $v) {
					if (substr($str['sjinfo'][$k], 0, 1) == 'a') {
						$strs['sjym'] = str_replace('-', '', substr($str['sjinfo'][$k], 1));
					}
					if (substr($str['sjinfo'][$k], 0, 1) == 'd') {
						$floats = str_replace('', '', preg_replace("/[a-z,A-Z]/", "", $str['sjinfo'][$k]));
						$strs['sjkb'] = $floats . 'cm';
					}
				}


				$str['nrjj'] = explode('', $arr['nrjj']);
				foreach ($str['nrjj'] as $k => $v) {
					if (substr($str['nrjj'][$k], 0, 1) == 'a') {
						$strs['nrjj'] = str_replace('-', '', substr($str['nrjj'][$k], 1, -1));
					}
				}

				$str['ssyz'] = explode('', $arr['ssyz']);
				foreach ($str['ssyz'] as $k => $v) {
					if (substr($str['ssyz'][$k], 0, 1) == 'a') {
						$strs['ssyz'] = str_replace('-', '', substr($str['ssyz'][$k], 1, -1));
					}
				}

				$strs['rksj'] = date("Y-m-d H:i:s");


				$strs['ssh'] = $strs['ztfl'] . "/" . $strs['zch'];


				/**重要:转码,不然会乱码 */

				foreach ($strs as $k => $v) {
					$strs[$k] = iconv("GBK", "UTF-8", $v);
				}




				if (!$strs['sjbm']) {
					$strs['sjbm'] = '';
				} else {
					$strs['sjbm'] = str_replace("'", "", $strs['sjbm']);
				}
				if (!$strs['zzxx']) {
					$strs['zzxx'] = '';
				} else {
					$strs['zzxx'] = str_replace("'", "", $strs['zzxx']);
				}
				if (!$strs['bzISBN']) {
					$strs['bzISBN'] = '';
				} else {
					$strs['bzISBN'] = str_replace("'", "", $strs['bzISBN']);
				}
				if (!$strs['sjjg']) {
					$strs['sjjg'] = '';
				} else {
					$strs['sjjg'] = str_replace("'", "", $strs['sjjg']);
				}
				if (!$strs['cbdd']) {
					$strs['cbdd'] = '';
				} else {
					$strs['cbdd'] = str_replace("'", "", $strs['cbdd']);
				}
				if (!$strs['cbsm']) {
					$strs['cbsm'] = '';
				} else {
					$strs['cbsm'] = str_replace("'", "", $strs['cbsm']);
				}
				if (!$strs['cbrq']) {
					$strs['cbrq'] = '';
				} else {
					str_replace("'", "", $strs['cbrq']);
				}
				if (!$strs['ztfl']) {
					$strs['ztfl'] = '';
				} else {
					$strs['ztfl'] = str_replace("'", "", $strs['ztfl']);
				}
				if (!$strs['sjym']) {
					$strs['sjym'] = '';
				} else {
					$strs['sjym'] = str_replace("'", "", $strs['sjym']);
				}
				if (!$strs['sjkb']) {
					$strs['sjkb'] = '';
				} else {
					$strs['sjkb'] = str_replace("'", "", $strs['sjkb']);
				}
				if (!$strs['nrjj']) {
					$strs['nrjj'] = '';
				} else {
					$strs['nrjj'] = str_replace("'", "", $strs['nrjj']);
				}
				if (!$strs['sjbh']) {
					$strs['sjbh'] = '';
				} else {
					$strs['sjbh'] = str_replace("'", "", $strs['sjbh']);
				}
				if (!$strs['cszl']) {
					$strs['cszl'] = '';
				} else {
					$strs['cszl'] = str_replace("'", "", $strs['cszl']);
				}

				if (!$strs['rksj']) {
					$strs['rksj'] = '';
				} else {
					$strs['rksj'] = str_replace("'", "", $strs['rksj']);
				}
				if (!$strs['zch']) {
					$strs['zch'] = '';
				} else {
					$strs['zch'] = str_replace("'", "", $strs['zch']);
				}
				if (!$strs['ssh']) {
					$strs['ssh'] = '';
				} else {
					$strs['ssh'] = str_replace("'", "", $strs['ssh']);
				}


				/**
				 * 将$strs  循环拼接成sql语句
				 */


				$values1 .= "(";
				$values1 .= "'" . $strs['sjbm'] . "'" . "," . "'" . $strs['zzxx'] . "'" . "," . "'" . $strs['bzISBN'] . "'" . "," . "'" . $strs['sjjg'] . "'" . "," . "'" . $strs['cbdd'] . "'" . "," . "'" . $strs['cbsm'] . "'" . "," . "'" . $strs['cbrq'] . "'" . "," . "'" . $strs['ztfl'] . "'" . "," . "'" . $strs['sjym'] . "'" . "," . "'" . $strs['sjkb'] . "'" . "," . "'" . $strs['nrjj'] . "'" . "," . "'" . $strs['sjbh'] . "'" . "," . "'" . $strs['cszl'] . "'" . "," . "'" . $strs['gnsy'] . "'" . "," . "'" . $strs['rksj'] . "'" . "," . "'" . $strs['zch'] . "'" . "," . "'" . $strs['ssh'] . "'" . "," . "'" . $strs['zgzt'] . "'" . "," . "'" . $strs['gcd'] . "'" . "," . "'" . $_SESSION['gid'] . "'" . "," . "'" . $strs['ssyz'] . "','" . $strs['flmc'] . "'";

				$values1 .= "),";
				$values2 = substr($values1, 0, -1) . ",";
			}

			$sql = "insert into tbookinfo (`sjbm`,`zzxx`,`bzISBN`,`sjjg`,`cbdd`,`cbsm`,`cbrq`,`ztfl`,`sjym`,`sjkb`,`nrjj`,`sjbh`,`rksj`,`zch`,`ssh`) values";
			$batchs = substr($sql . $values2, 0, -1);

			$res = D()->query($batchs, 'insert');

			//清空values1 因为之前的都是 .=拼接的 所以在这里需要重新给valuses1 赋值为空
			$values1 = "";
		}

	}

评论

Home - Wiki
Copyright © 2011-2024 iteam. Current version is 2.139.0. UTC+08:00, 2024-12-30 01:55
浙ICP备14020137号-1 $Map of visitor$