{"id":390,"date":"2020-03-24T16:00:05","date_gmt":"2020-03-24T07:00:05","guid":{"rendered":"https:\/\/www.dh.ku-orcas.kansai-u.ac.jp\/?p=390"},"modified":"2020-06-24T14:54:57","modified_gmt":"2020-06-24T05:54:57","slug":"python%e3%82%92%e4%bd%bf%e3%81%a3%e3%81%a6%e9%9b%bb%e5%ad%90%e5%9b%b3%e6%9b%b8%e9%a4%a8%e3%81%8b%e3%82%89%e6%8c%bf%e7%b5%b5%e3%81%ae%e3%83%9a%e3%83%bc%e3%82%b8%e3%82%92%e6%8a%bd%e5%87%ba%e3%81%99","status":"publish","type":"post","link":"https:\/\/www.dh.ku-orcas.kansai-u.ac.jp\/?p=390","title":{"rendered":"Python\u3092\u4f7f\u3063\u3066\u96fb\u5b50\u56f3\u66f8\u9928\u304b\u3089\u633f\u7d75\u306e\u30da\u30fc\u30b8\u3092\u62bd\u51fa\u3059\u308b"},"content":{"rendered":"\n<hr class=\"wp-block-separator\"\/>\n\n\n\n<p><\/p>\n\n\n\n<p style=\"background-color:#fcf8e3;color:#8a6d3b\" class=\"has-text-color has-background\"><strong>Stephen Krewson<\/strong><\/p>\n\n\n\n<p style=\"background-color:#fcf8e3;color:#8a6d3b\" class=\"has-text-color has-background\">HathiTrust\u3068Internet Archive\u306b\u3088\u308b\u6a5f\u68b0\u5b66\u7fd2\u3068API\u306e\u62e1\u5f35\u6a5f\u80fd\u306b\u3088\u3063\u3066\u3001\u30c7\u30b8\u30bf\u30eb\u5316\u8cc7\u6599\u306e\u306a\u304b\u304b\u3089\u3001\u8996\u899a\u7684\u306b\u95a2\u5fc3\u306e\u3042\u308b\u30da\u30fc\u30b8\u9818\u57df\u3092\u7c21\u5358\u306b\u62bd\u51fa\u3067\u304d\u308b\u3088\u3046\u306b\u306a\u308a\u307e\u3057\u305f\u3002\u3053\u306e\u30ec\u30c3\u30b9\u30f3\u3067\u306f\u3001\u3053\u308c\u3089\u306e\u9818\u57df\u3092\u52b9\u7387\u3088\u304f\u62bd\u51fa\u3057\u3001\u305d\u308c\u306b\u3088\u3063\u3066\u65b0\u3057\u3044\u8996\u899a\u7684\u306a\u7814\u7a76\u8ab2\u984c\u3078\u53d6\u308a\u7d44\u3080\u65b9\u6cd5\u3092\u7d39\u4ecb\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\">\u76ee\u6b21<\/h2>\n\n\n\n<ul><li><a href=\"http:\/\/overview\">\u6982\u8981<\/a><\/li><li><a href=\"#goals\">\u76ee\u6a19<\/a><\/li><li><a href=\"#requirements\">\u5fc5\u8981\u306a\u8981\u4ef6<\/a><\/li><li><a href=\"#setup\">\u30bb\u30c3\u30c8\u30a2\u30c3\u30d7<\/a><ul><li><a href=\"#dependencies\">\u4f9d\u5b58\u95a2\u4fc2<\/a><\/li><li><a href=\"#lesson-files\">\u30ec\u30c3\u30b9\u30f3\u30d5\u30a1\u30a4\u30eb<\/a><ul><li><a href=\"#download-destination\">\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u5148<\/a><\/li><\/ul><\/li><li><a href=\"#anaconda-optional\">Anaconda\uff08\u30aa\u30d7\u30b7\u30e7\u30f3\uff09<\/a><ul><li><a href=\"#create-an-environment\">\u74b0\u5883\u8a2d\u5b9a<\/a><\/li><li><a href=\"#install-conda-packages\">Conda\u30d1\u30c3\u30b1\u30fc\u30b8\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb<\/a><\/li><li><a href=\"#install-pip-packages\">Pip\u30d1\u30c3\u30b1\u30fc\u30b8\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb<\/a><\/li><\/ul><\/li><li><a href=\"#jupyter-notebooks\">Jupyter Notebook<\/a><\/li><\/ul><\/li><li><a href=\"#hathitrust\">HathiTrust<\/a><ul><li><a href=\"#api-access\">API\u30a2\u30af\u30bb\u30b9<\/a><\/li><li><a href=\"#create-volume-list\">\u8cc7\u6599\u30ea\u30b9\u30c8\u306e\u4f5c\u6210<\/a><\/li><li><a href=\"#visual-feature-image_on_page\">\u8996\u899a\u7684\u306a\u7279\u5fb4\uff1aIMAGE_ON_PAGE<\/a><\/li><li><a href=\"#code-walk-through\">\u30b3\u30fc\u30c9\u30fb\u30a6\u30a9\u30fc\u30af\u30b9\u30eb\u30fc<\/a><ul><li><a href=\"#find-pictures\">\u753b\u50cf\u3092\u898b\u3064\u3051\u308b<\/a><\/li><li><a href=\"#download-images\">\u753b\u50cf\u3092\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3059\u308b<\/a><\/li><\/ul><\/li><\/ul><\/li><li><a href=\"#internet-archive\">Internet Archive<\/a><ul><li><a href=\"#api-access-1\">API\u30a2\u30af\u30bb\u30b9<\/a><\/li><li><a href=\"#create-volume-list-1\">\u8cc7\u6599\u30ea\u30b9\u30c8\u306e\u4f5c\u6210<\/a><\/li><li><a href=\"#visual-feature-picture-blocks\">\u8996\u899a\u7684\u306a\u7279\u5fb4\uff1aPicture\u30d6\u30ed\u30c3\u30af<\/a><\/li><li><a href=\"#code-walk-through-1\">\u30b3\u30fc\u30c9\u30fb\u30a6\u30a9\u30fc\u30af\u30b9\u30eb\u30fc<\/a><ul><li><a href=\"#find-pictures-1\">\u753b\u50cf\u3092\u898b\u3064\u3051\u308b<\/a><\/li><li><a href=\"#download-images-1\">\u753b\u50cf\u3092\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3059\u308b<\/a><\/li><\/ul><\/li><\/ul><\/li><li><a href=\"#next-steps\">\u3053\u306e\u5f8c\u306e\u30b9\u30c6\u30c3\u30d7<\/a><\/li><\/ul>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"overview\">\u6982\u8981<\/h2>\n\n\n\n<p>\u3082\u3057\u672c\u306e\u4e2d\u306e\u633f\u7d75\u3060\u3051\u898b\u305f\u304b\u3063\u305f\u3089\u3001\u3069\u3046\u3059\u308c\u3070\u3044\u3044\u3060\u308d\u3046\uff1f\u3053\u306e\u7591\u554f\u306f\u3001\u5c0f\u3055\u3044\u5b50\u4f9b\u3082\u5927\u4eba\u306e\u7814\u7a76\u8005\u3082\u601d\u3044\u3064\u304f\u3082\u306e\u3067\u3059\u3002\u672c\u304c\u96fb\u5b50\u56f3\u66f8\u9928\u304b\u3089\u5165\u624b\u53ef\u80fd\u3060\u3068\u5206\u304b\u3063\u3066\u3044\u308b\u3068\u3059\u308c\u3070\u3001\u633f\u7d75\u306e\u3042\u308b\u30da\u30fc\u30b8\u3060\u3051\u3092\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3057\u3066\u4ed6\u306e\u30da\u30fc\u30b8\u3092\u7121\u8996\u3067\u304d\u308b\u3068\u3042\u308a\u304c\u305f\u3044\u3067\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<p>\u4ee5\u4e0b\u306f\u3001HathiTrust\u3067\u8b58\u5225\u5b50<font style=\"color:#ff0000\" class=\"has-text-color\">osu.32435078698222<\/font>\u3092\u6301\u3064\u8cc7\u6599\u306e\u30da\u30fc\u30b8\u30b5\u30e0\u30cd\u30a4\u30eb\u3067\u3059\u3002\u3053\u306e\u30ec\u30c3\u30b9\u30f3\u30d7\u30ed\u30bb\u30b9\u306e\u5f8c\u3001\u633f\u7d75\u306e\u3042\u308b\u30da\u30fc\u30b8\u306e\u307f\uff08\u5408\u8a0831\u30da\u30fc\u30b8\uff09\u304cJPEG\u30d5\u30a1\u30a4\u30eb\u3068\u3057\u3066\u30d5\u30a9\u30eb\u30c0\u306b\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3067\u304d\u307e\u3057\u305f\u3002<\/p>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter\"><img decoding=\"async\" loading=\"lazy\" width=\"1268\" height=\"427\" src=\"https:\/\/www.dh.ku-orcas.kansai-u.ac.jp\/wp-content\/uploads\/file-explorer-example.png\" alt=\"\" class=\"wp-image-437\"\/><figcaption>  \u633f\u7d75\u306e\u3042\u308b\u30da\u30fc\u30b8\u306e\u307f\u3092\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3057\u305f\u69d8\u5b50 <\/figcaption><\/figure><\/div>\n\n\n\n<p>\u300c\u633f\u7d75\u306e\u306a\u3044\u300d\u30da\u30fc\u30b8\u304c\u3069\u308c\u3060\u3051\u53d6\u308a\u9664\u304b\u308c\u305f\u306e\u304b\u3092\u898b\u308b\u306b\u306f\u3001\u5150\u7ae5\u6587\u5b66\u306e\u30d9\u30b9\u30c8\u30bb\u30e9\u30fc\u3001Samuel Griswold Goodrich\u306e\u300e\u30d4\u30fc\u30bf\u30fc\u30fb\u30d1\u30fc\u30ec\u30fc\u306e\u30a2\u30e1\u30ea\u30ab\u7269\u8a9e\uff08<em>Tales of Peter Parley about America<\/em>\uff09\u300f\uff081827\u5e74\u521d\u7248\uff09\u306e1845\u5e74\u6539\u8a02\u7248<a href=\"https:\/\/babel.hathitrust.org\/cgi\/pt?id=osu.32435078698222&amp;view=thumb&amp;seq=1\">\u5168148\u30da\u30fc\u30b8\u306e\u30b5\u30e0\u30cd\u30a4\u30eb<\/a>\u3068\u6bd4\u8f03\u3057\u3066\u307f\u308b\u3068\u3088\u3044\u3067\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter\"><img decoding=\"async\" loading=\"lazy\" width=\"1921\" height=\"1073\" src=\"https:\/\/www.dh.ku-orcas.kansai-u.ac.jp\/wp-content\/uploads\/parley-full-thumbnails.png\" alt=\"\" class=\"wp-image-438\"\/><figcaption>  <em>HathiTrust<\/em>\u3067\u306e\u5168\u30da\u30fc\u30b8\u306e\u30b5\u30e0\u30cd\u30a4\u30eb\u306e\u69d8\u5b50 <\/figcaption><\/figure><\/div>\n\n\n\n<p>\u3053\u306e\u30ec\u30c3\u30b9\u30f3\u306f\u3001\u4e16\u754c\u6700\u5927\u7d1a\u306e2\u3064\u306e\u96fb\u5b50\u56f3\u66f8\u9928\u3001HathiTrust (HT)\u3068Internet Archive (IA)\u304c\u516c\u958b\u3057\u3066\u3044\u308b\u30d1\u30d6\u30ea\u30c3\u30af\u30c9\u30e1\u30a4\u30f3\u306e\u66f8\u7c4d\u304b\u3089\u3001\u3053\u306e\u3088\u3046\u306a\u30da\u30fc\u30b8\u306e\u62bd\u51fa\u3068\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3092\u884c\u3046\u305f\u3081\u306e\u624b\u6cd5\u3092\u7d39\u4ecb\u3057\u307e\u3059\u3002\u633f\u7d75\u3068\u672c\u306e\u30ec\u30a4\u30a2\u30a6\u30c8(<em>mise en page<\/em>)\u306e\u6b74\u53f2\u3092\u77e5\u308b\u305f\u3081\u306b\u753b\u50cf\u30b3\u30fc\u30d1\u30b9\u3092\u4f5c\u308a\u305f\u3044\u4eba\u306b\u3068\u3063\u3066\u306f\u3001\u8208\u5473\u3042\u308b\u3082\u306e\u3068\u306a\u308b\u3067\u3057\u3087\u3046\u3002<a href=\"https:\/\/ebba.english.ucsb.edu\/\">EBBA<\/a>\u3068<a rel=\"noreferrer noopener\" href=\"http:\/\/projectaida.org\/\" target=\"_blank\">AIDA<\/a>\u306e\u5148\u99c6\u7684\u53d6\u308a\u7d44\u307f\u306b\u3064\u3065\u3044\u3066\u3001\u30c7\u30b8\u30bf\u30eb\u66f8\u8a8c\u5b66\u3078\u306e\u8996\u899a\u7684\u30a2\u30d7\u30ed\u30fc\u30c1\u304c\u4eba\u6c17\u3092\u96c6\u3081\u3066\u3044\u307e\u3059\u3002\u6700\u8fd1\u5b8c\u4e86\u3057\u305f\u3001\u3042\u308b\u3044\u306f\u3001\u7814\u7a76\u8cc7\u91d1\u3092\u5f97\u305f2\u3064\u306e\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u306e<a href=\"https:\/\/www.neh.gov\/divisions\/odh\/grant-news\/announcing-new-2017-odh-grant-awards\">\u4f8b<\/a>\u3092\u6319\u3052\u308b\u3068\u3001<a href=\"https:\/\/web.archive.org\/web\/20190526050917\/http:\/\/culturalanalytics.org\/2018\/12\/detecting-footnotes-in-32-million-pages-of-ecco\/\">\u811a\u6ce8\u306e\u7279\u5b9a<\/a>\u3084<a href=\"http:\/\/www.ccs.neu.edu\/home\/dasmith\/ichneumon-proposal.pdf\">\u508d\u6ce8\u306e\u8ffd\u8de1<\/a>\u306e\u65b9\u6cd5\u3092\u958b\u767a\u3059\u308b\u3082\u306e\u3082\u3042\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u79c1\u81ea\u8eab\u306e\u7814\u7a76\u306f\u300119\u4e16\u7d00\u306e\u533b\u5b66\u66f8\u3084\u6559\u80b2\u66f8\u306e\u633f\u7d75\u306e\u983b\u5ea6\u3084\u69d8\u5f0f\u306e\u5909\u5316\u306b\u95a2\u3057\u3066\u7d4c\u9a13\u7684\u306a\u554f\u984c\u306b\u7b54\u3048\u3088\u3046\u3068\u3059\u308b\u3082\u306e\u3067\u3059\u3002\u3053\u308c\u306b\u306f\u3001\u672c\u3054\u3068\u306e\u633f\u7d75\u306e\u6570\u3092\u96c6\u8a08\u3057\u3001\u3053\u308c\u3089\u306e\u633f\u7d75\u3092\u4f5c\u308b\u306e\u306b\u3069\u306e\u3088\u3046\u306a\u5370\u5237\u5de5\u7a0b\u304c\u884c\u308f\u308c\u305f\u306e\u304b\u3092\u63a8\u5bdf\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002\u633f\u7d75\u306e\u30da\u30fc\u30b8\u62bd\u51fa\u306e\u304a\u305d\u3089\u304f\u3088\u308a\u7684\u78ba\u306a\u30e6\u30fc\u30b9\u30b1\u30fc\u30b9\u306f\u3001\u540c\u3058\u672c\u306e<a href=\"https:\/\/www.cambridge.org\/core\/books\/cambridge-companion-to-robinson-crusoe\/iconic-crusoe-illustrations-and-images-of-robinson-crusoe\/B83352C33FB1A9929A856FFA8E2D0CD0\/core-reader\">\u7570\u306a\u308b\u7248<\/a>\u3067\u306e\u633f\u7d75\u306e\u5bfe\u7167\u8abf\u67fb\u3067\u3059\u3002\u4eca\u5f8c\u306e\u7814\u7a76\u3067\u306f\u3001\u62bd\u51fa\u3057\u305f\u753b\u50cf\u306e\u8996\u899a\u7684\u306a\u7279\u5fb4\u3084\u300c\u610f\u5473\u300d\uff08\u8272\u3001\u5927\u304d\u3055\u3001\u30c6\u30fc\u30de\u3001\u30b8\u30e3\u30f3\u30eb\u3001\u633f\u7d75\u306e\u6570\u306a\u3069\uff09\u3092\u6709\u76ca\u306b\u8abf\u67fb\u3059\u308b\u3053\u3068\u306b\u306a\u308b\u3067\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<p>\u95a2\u5fc3\u306e\u3042\u308b\u9818\u57df\u306e\u306a\u304b\u304b\u3089\u3055\u3089\u306b\u7d5e\u308a\u8fbc\u3093\u3067\u60c5\u5831\u3092\u629c\u304d\u51fa\u3059\u306b\u306f\u3001\u304b\u306a\u308a\u306e\u6a5f\u68b0\u5b66\u7fd2\u3092\u5fc5\u8981\u3068\u3059\u308b\u904e\u7a0b\u306a\u306e\u3067\u3001\u3053\u306e\u30ec\u30c3\u30b9\u30f3\u306e\u7bc4\u56f2\u3092\u8d85\u3048\u3066\u3044\u307e\u3059\u3002\u3057\u304b\u3057\u3001\u633f\u7d75\u304c\u3042\u308b\uff08\u3042\u308b\u3044\u306f\u306a\u3044\uff09\u30da\u30fc\u30b8\u306eYes\/No\u5206\u985e\u306f\u3001\u5bfe\u8c61\u30b3\u30ec\u30af\u30b7\u30e7\u30f3\u306e\u305d\u308c\u305e\u308c\u306e\u672c\u306e\u300c\u3059\u3079\u3066\u306e\u300d\u30da\u30fc\u30b8\u3001\u3068\u3044\u3046\u81a8\u5927\u306a\u30b9\u30da\u30fc\u30b9\u3092\u72ed\u3081\u308b\u3046\u3048\u3067\u306e\u6700\u521d\u306e\u5b9f\u7528\u7684\u306a\u30b9\u30c6\u30c3\u30d7\u3067\u3042\u308a\u3001\u305d\u308c\u306b\u3088\u308a\u633f\u7d75\u306e\u7d5e\u308a\u8fbc\u307f\uff08localization\uff09\u3092\u53ef\u80fd\u3068\u3059\u308b\u3082\u306e\u3067\u3059\u3002\u53c2\u8003\u3068\u3057\u3066\u300119\u4e16\u7d00\u306e\u533b\u5b66\u66f8\u3067\u306f\uff08\u5e73\u5747\u3057\u3066\uff09\u30da\u30fc\u30b8\u306e1\uff5e3%\u306b\u633f\u7d75\u304c\u3042\u308a\u307e\u3059\u3002\u3064\u307e\u308a\u3001\u4eee\u306b\u524d\u63d0\u77e5\u8b58\u304c\u306a\u3044\u96fb\u5b50\u56f3\u66f8\u9928\u30b3\u30fc\u30d1\u30b9\u5185\u306e\u633f\u7d75\u3092\u7814\u7a76\u3057\u3088\u3046\u3068\u3059\u308b\u3068\u3001\u30b3\u30fc\u30d1\u30b9\u306e90%\u4ee5\u4e0a\u306e\u30da\u30fc\u30b8\u306b\u306f\u633f\u7d75\u304c\u300c\u306a\u3044\u300d\u3068\u4eee\u5b9a\u3067\u304d\u308b\u306e\u3067\u3059\u3002<\/p>\n\n\n\n<p>HathiTrust\u3068Internet Archive\u306f\u3001\u5149\u5b66\u7684\u6587\u5b57\u8a8d\u8b58\u30bd\u30d5\u30c8\u30a6\u30a7\u30a2\uff08OCR\u306f\u7d19\u306e\u672c\u3092\u30b9\u30ad\u30e3\u30f3\u3057\u305f\u5f8c\u306b\u65bd\u3055\u308c\u3001\u3057\u3070\u3057\u3070\u30ce\u30a4\u30ba\u306e\u591a\u3044\u30c6\u30ad\u30b9\u30c8\u30c7\u30fc\u30bf\u304c\u4f5c\u3089\u308c\u307e\u3059\uff09\u3092\u4f7f\u3063\u3066\u751f\u6210\u3055\u308c\u305f\u30c7\u30fc\u30bf\u3092\u89e3\u6790\u3059\u308b\u3053\u3068\u3067\u3001\u633f\u7d75\u306e\u3042\u308b\u306a\u3057\u306e\u554f\u984c\u306b\u7b54\u3048\u308b\u3053\u3068\u304c\u53ef\u80fd\u3067\u3059\u3002OCR\u306b\u3088\u308b\u30c7\u30fc\u30bf\u3092\u6d3b\u7528\u3057\u3066\u633f\u7d75\u306e\u30da\u30fc\u30b8\u3092\u898b\u3064\u3051\u308b\u3068\u3044\u3046\u306e\u306f\u3001Kalev Leetaru\u304c2014\u5e74\u306bInternet Archive\u3068Flickr\u3068\u306e\u5171\u540c\u7814\u7a76\u3067\u6700\u521d\u306b\u63d0\u6848\u3057\u305f\u3082\u306e\u3067\u3057\u305f\u3002\u3053\u306e\u30ec\u30c3\u30b9\u30f3\u3067\u306f\u3001Leetaru\u306e\u30a2\u30d7\u30ed\u30fc\u30c1\u3092HathiTrust\u306b\u79fb\u3057\u3001Python\u306eXML\u9ad8\u901f\u51e6\u7406\u30e9\u30a4\u30d6\u30e9\u30ea\u3068\u3001\u6700\u8fd1\u5e45\u5e83\u304f\u306a\u3063\u305fInternet Archive\u306e\u753b\u50cf\u30d5\u30a1\u30a4\u30eb\u5f62\u5f0f\u3092\u6d3b\u7528\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<p>HT\u3068IA\u306fOCR\u3067\u62bd\u51fa\u3057\u305f\u60c5\u5831\u3092\u5c11\u3057\u7570\u306a\u308b\u65b9\u5f0f\u3067\u516c\u958b\u3057\u3066\u3044\u308b\u305f\u3081\u3001\u5404\u30e9\u30a4\u30d6\u30e9\u30ea\u306e\u300c\u8996\u899a\u7684\u306a\u7279\u5fb4\u300d\u306e\u8a73\u7d30\u306b\u3064\u3044\u3066\u306f\u3001\u305d\u308c\u305e\u308c\u306e\u7bc0\u3067\u8ff0\u3079\u308b\u3053\u3068\u3068\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"goals\">\u76ee\u6a19<\/h2>\n\n\n\n<p>\u3053\u306e\u30ec\u30c3\u30b9\u30f3\u306e\u5f8c\u3001\u3042\u306a\u305f\u306f<\/p>\n\n\n\n<ul><li>Anaconda\u306e\u300c\u6700\u5c0f\u9650\u300d\u306ePython\u30c7\u30a3\u30b9\u30c8\u30ea\u30d3\u30e5\u30fc\u30b7\u30e7\u30f3\uff08Miniconda\uff09\u3092\u30bb\u30c3\u30c8\u30a2\u30c3\u30d7\u3057\u3066\u3001\u74b0\u5883\u3092\u69cb\u7bc9\u3067\u304d\u308b\u3088\u3046\u306b\u306a\u308a\u307e\u3059<\/li><li>\u691c\u7d22\u306b\u3088\u3063\u3066\u751f\u6210\u3055\u308c\u305fHT\u307e\u305f\u306fIA\u306e\u66f8\u7c4dID\u306e\u30ea\u30b9\u30c8\u3092\u4fdd\u5b58\u3057\u3001\u53cd\u5fa9\u51e6\u7406\u3092\u884c\u3046\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/li><li>HT\u3068IA\u30c7\u30fc\u30bf\u306e\u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u30fb\u30d7\u30ed\u30b0\u30e9\u30de\u30fb\u30a4\u30f3\u30bf\u30fc\u30d5\u30a7\u30a4\u30b9\uff08API\uff09\u306bPython\u306e\u30e9\u30a4\u30d6\u30e9\u30ea\u3092\u901a\u3058\u3066\u30a2\u30af\u30bb\u30b9\u3067\u304d\u308b\u3088\u3046\u306b\u306a\u308a\u307e\u3059\u3002<\/li><li>\u30da\u30fc\u30b8\u30ec\u30d9\u30eb\u3067\u306e\u898b\u305f\u76ee\u306e\u7279\u6027\u3092\u898b\u3064\u3051\u308b\u3053\u3068\u304c\u3067\u304d\u308b\u3088\u3046\u306b\u306a\u308a\u307e\u3059\u3002<\/li><li>\u30d7\u30ed\u30b0\u30e9\u30df\u30f3\u30b0\u306b\u3088\u3063\u3066\u30da\u30fc\u30b8\u306eJPEG\u30d5\u30a1\u30a4\u30eb\u3092\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u304c\u3067\u304d\u308b\u3088\u3046\u306b\u306a\u308a\u307e\u3059\u3002<\/li><\/ul>\n\n\n\n<p>\u3088\u308a\u5927\u304d\u306a\u76ee\u6a19\u306f\u3001\u6b74\u53f2\u7684\u306a\u633f\u7d75\u306e\u30b3\u30fc\u30d1\u30b9\u3092\u4f5c\u6210\u3059\u308b\u3053\u3068\u3067\u3001\u30c7\u30fc\u30bf\u53ce\u96c6\u3068\u63a2\u7d22\u30b9\u30ad\u30eb\u3092\u9ad8\u3081\u308b\u3053\u3068\u306b\u3042\u308a\u307e\u3059\u3002\u753b\u50cf\u30c7\u30fc\u30bf\u3068\u66f8\u7c4d\u30e1\u30bf\u30c7\u30fc\u30bf\u3092\u5408\u308f\u305b\u308b\u3053\u3068\u3067\u3001\u6642\u9593\u3092\u901a\u3058\u3066\u8996\u899a\u7684\u306a\u5909\u5316\u3092\u8003\u5bdf\u3059\u308b\u3068\u3044\u3046\u610f\u7fa9\u306e\u3042\u308b\u7814\u7a76\u8ab2\u984c\u3092\u7acb\u3066\u308b\u3053\u3068\u304c\u53ef\u80fd\u306b\u306a\u308b\u3067\u3057\u3087\u3046\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"requirements\">\u5fc5\u8981\u306a\u8981\u4ef6<\/h2>\n\n\n\n<p>\u3053\u306e\u30ec\u30c3\u30b9\u30f3\u306e\u30bd\u30d5\u30c8\u30a6\u30a7\u30a2\u8981\u4ef6\u306f\u6700\u5c0f\u9650\u306e\u3082\u306e\u3067\u3001\u6a19\u6e96\u7684\u306aOS\u304c\u52d5\u4f5c\u3059\u308b\u30b3\u30f3\u30d4\u30e5\u30fc\u30bf\u3068\u30a6\u30a7\u30d6\u30d6\u30e9\u30a6\u30b6\u3067\u3059\u3002Miniconda\u306f\u3001Windows\u3001macOS\u3001Linux\u306e\u305d\u308c\u305e\u308c32\u30d3\u30c3\u30c8\u300164\u30d3\u30c3\u30c8\u7248\u3067\u5229\u7528\u53ef\u80fd\u3067\u3059\u3002Python\n3\u304c\u73fe\u5728\u306e\u5b89\u5b9a\u7248\u3067\u3042\u308a\u3001\u7121\u671f\u9650\u3067\u30b5\u30dd\u30fc\u30c8\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u3053\u306e\u30c1\u30e5\u30fc\u30c8\u30ea\u30a2\u30eb\u306f\u3001\u30b3\u30de\u30f3\u30c9\u30e9\u30a4\u30f3\u3068Python\u306e\u57fa\u790e\u77e5\u8b58\u3092\u524d\u63d0\u3068\u3057\u3066\u3044\u307e\u3059\u3002\u30b7\u30a7\u30eb\u30d9\u30fc\u30b9\u30c1\u30e5\u30fc\u30c8\u30ea\u30a2\u30eb\u306b\u304a\u3051\u308b\u30b3\u30e1\u30f3\u30c8\u3068\u30b3\u30de\u30f3\u30c9\u306e\u898f\u5247\u306e\u7406\u89e3\u304c\u5fc5\u8981\u3067\u3059\u3002\u30b3\u30de\u30f3\u30c9\u30e9\u30a4\u30f3\u30b9\u30ad\u30eb\u306e\u30d6\u30e9\u30c3\u30b7\u30e5\u30a2\u30c3\u30d7\u306b\u306f\u3001Ian Milligan\u3068James Baker\u306b\u3088\u308b\u300c<a href=\"https:\/\/programminghistorian.org\/en\/lessons\/intro-to-bash\">Bash\u30b3\u30de\u30f3\u30c9\u30e9\u30a4\u30f3\u5165\u9580<\/a>\u300d\u3092\u304a\u52e7\u3081\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"setup\">\u30bb\u30c3\u30c8\u30a2\u30c3\u30d7<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"dependencies\">\u4f9d\u5b58\u95a2\u4fc2<\/h3>\n\n\n\n<p>\u3088\u308a\u7d4c\u9a13\u8c4a\u5bcc\u306a\u8aad\u8005\u306f\u3001\u5358\u306b\u4f9d\u5b58\u95a2\u4fc2\u3092\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3057\u3001\u9078\u629e\u3057\u305f\u74b0\u5883\u3067\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u3092\u5b9f\u884c\u3057\u305f\u3044\u304b\u3082\u3057\u308c\u307e\u305b\u3093\u3002\u79c1\u306eMiniconda\u306e\u30bb\u30c3\u30c8\u30a2\u30c3\u30d7\uff08\u304a\u3088\u3073Windows\u3068Unix\u7cfb\u306e\u9055\u3044\uff09\u306b\u3064\u3044\u3066\u306e\u8ffd\u52a0\u60c5\u5831\u3092\u63d0\u4f9b\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<ul><li>hathitrust-api (<a href=\"https:\/\/github.com\/rlmv\/hathitrust-api\">\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u306f\u3053\u3061\u3089<\/a>)<\/li><li>internetarchive (<a href=\"https:\/\/archive.org\/services\/docs\/api\/internetarchive\/\">\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u306f\u3053\u3061\u3089<\/a>)<\/li><li>jupyter (<a href=\"https:\/\/jupyter.org\/install\">\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u306f\u3053\u3061\u3089<\/a>)<\/li><li>requests (<a href=\"https:\/\/requests.readthedocs.io\/en\/master\/\">\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u306f\u3053\u3061\u3089<\/a>) [\u4f5c\u6210\u8005\u306f<font style=\"color:#ff0000\" class=\"has-text-color\">pipenv<\/font>\u3067\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3092\u63a8\u5968\u3057\u3066\u3044\u307e\u3059\u3002<font style=\"color:#ff0000\" class=\"has-text-color\">pip<\/font>\u306b\u3064\u3044\u3066\u306f<a rel=\"noreferrer noopener\" href=\"https:\/\/pypi.org\/project\/requests2\/\" target=\"_blank\">PyPI<\/a>\u3092\u53c2\u7167\u3002]<\/li><\/ul>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"lesson-files\">\u30ec\u30c3\u30b9\u30f3\u30d5\u30a1\u30a4\u30eb<\/h3>\n\n\n\n<p>\u3053\u3061\u3089\u306e<a href=\"https:\/\/programminghistorian.org\/assets\/extracting-illustrated-pages\/lesson-files.zip\">\u5727\u7e2e\u30d5\u30a9\u30eb\u30c0<\/a>\u3092\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3057\u3066\u304f\u3060\u3055\u3044\u3002\u3053\u308c\u306f\u3001HT\u3068AI\u305d\u308c\u305e\u308c\u306e\u96fb\u5b50\u56f3\u66f8\u9928\u306b\u5bfe\u5fdc\u3059\u308b2\u3064\u306eJupyter Notebook\u3092\u542b\u3093\u3060\u3082\u306e\u3067\u3059\u3002\u307e\u305f\u3001\u30d5\u30a9\u30eb\u30c0\u306b\u306f\u3001HathiTrust\u306e\u30b3\u30ec\u30af\u30b7\u30e7\u30f3\u3092\u8a18\u8ff0\u3057\u305fJSON\u30e1\u30bf\u30c7\u30fc\u30bf\u306e\u30b5\u30f3\u30d7\u30eb\u3082\u542b\u307e\u308c\u3066\u3044\u307e\u3059\u3002\u89e3\u51cd\u3057\u3066\u4ee5\u4e0b\u306e\u30d5\u30a1\u30a4\u30eb\u304c\u3042\u308b\u3053\u3068\u3092\u78ba\u8a8d\u3057\u3066\u304f\u3060\u3055\u3044\uff1a&nbsp;<font style=\"color:#ff0000\" class=\"has-text-color\">554050894-1535834127.json,&nbsp;hathitrust.ipynb,&nbsp;internetarchive.ipynb.<\/font><\/p>\n\n\n\n<p style=\"background-color:#faf2cc;color:#8a6d3b\" class=\"has-text-color has-background\">\u4ee5\u4e0b\u306e\u5168\u3066\u306e\u30b3\u30de\u30f3\u30c9\u306f\u3001\u30ec\u30c3\u30b9\u30f3\u30d5\u30a1\u30a4\u30eb\u3092\u683c\u7d0d\u3057\u305f\u30d5\u30a9\u30eb\u30c0\u304c\u30ab\u30ec\u30f3\u30c8\u30c7\u30a3\u30ec\u30af\u30c8\u30ea\u3068\u306a\u3063\u3066\u3044\u308b\u3053\u3068\u3092\u60f3\u5b9a\u3057\u305f\u3082\u306e\u3067\u3059\u3002<\/p>\n\n\n\n<h4 class=\"wp-block-heading\" id=\"download-destination\">\u25b6\ufe0e\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u5148<\/h4>\n\n\n\n<p>\u4ee5\u4e0b\u306f\u3001\u4e21\u65b9\u306e\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u306e\u3059\u3079\u3066\u306e\u30bb\u30eb\u304c\u5b9f\u884c\u3055\u308c\u308b\u3068\u4f5c\u6210\u3055\u308c\u308b\u3001\u30c7\u30d5\u30a9\u30eb\u30c8\u306e\u30c7\u30a3\u30ec\u30af\u30c8\u30ea\u3067\u3059\uff08\u63d0\u4f9b\u3055\u308c\u3066\u3044\u308b\u307e\u307e\u306e\u3082\u306e\u3067\u3059\uff09\u3002\u3042\u308b\u672c\u306e\u4e2d\u306e\u3069\u306e\u30da\u30fc\u30b8\u306b\u633f\u7d75\u304c\u542b\u307e\u308c\u3066\u3044\u308b\u304b\u306e\u30ea\u30b9\u30c8\u3092\u53d6\u5f97\u3057\u305f\u5f8c\u3001HT\u3068IA\u306e\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u6a5f\u80fd\u306f\u3053\u308c\u3089\u306e\u30da\u30fc\u30b8\u3092JPEG\uff08\u30d5\u30a1\u30a4\u30eb\u540d\u306f\u30da\u30fc\u30b8\u756a\u53f7\uff09\u3068\u3057\u3066\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3057\u3066\u3001\u305d\u308c\u3089\u3092\u30b5\u30d6\u30c7\u30a3\u30ec\u30af\u30c8\u30ea\uff08\u540d\u524d\u306f\u8cc7\u6599ID\uff09\u306b\u4fdd\u5b58\u3057\u307e\u3059\u3002\u3082\u3061\u308d\u3093\u3001\u7570\u306a\u308b\u66f8\u7c4d\u30ea\u30b9\u30c8\u3092\u7528\u3044\u305f\u308a\u3001\u51fa\u529b\u5148<font style=\"color:#ff0000\" class=\"has-text-color\">out_dir<\/font>\u3092<font style=\"color:#ff0000\" class=\"has-text-color\">items<\/font>\u4ee5\u5916\u306e\u3082\u306e\u306b\u5909\u3048\u305f\u308a\u3059\u308b\u3053\u3068\u3082\u53ef\u80fd\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-preformatted has-background has-very-light-gray-background-color\"> items\/\n \u251c\u2500\u2500 hathitrust\n \u2502&nbsp;&nbsp; \u251c\u2500\u2500 hvd.32044021161005\n \u2502&nbsp;&nbsp; \u2502&nbsp;&nbsp; \u251c\u2500\u2500 103.jpg\n \u2502&nbsp;&nbsp; \u2502&nbsp;&nbsp; \u2514\u2500\u2500 ...\n \u2502&nbsp;&nbsp; \u2514\u2500\u2500 osu.32435078698222\n \u2502&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \u251c\u2500\u2500 100.jpg\n \u2502&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \u251c\u2500\u2500 ...\n \u2514\u2500\u2500 internetarchive\n &nbsp;&nbsp;&nbsp; \u2514\u2500\u2500 talespeterparle00goodgoog\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \u251c\u2500\u2500 103.jpg\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; \u2514\u2500\u2500 ...\n &nbsp;\n 5 directories, 113 files <\/pre>\n\n\n\n<p>\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u6a5f\u80fd\u306f\u3044\u3044\u52a0\u6e1b\uff08lazy\uff09\u3067\u3001\u4e0a\u8a18\u306e\u3088\u3046\u306b<font style=\"color:#ff0000\" class=\"has-text-color\">items<\/font>\u306e\u30c7\u30a3\u30ec\u30af\u30c8\u30ea\u3092\u8868\u793a\u3057\u305f\u72b6\u614b\u3067\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u3092\u518d\u5ea6\u5b9f\u884c\u3059\u308b\u3068\u3001\u3059\u3067\u306b\u81ea\u5206\u306e\u30b5\u30d6\u30d5\u30a9\u30eb\u30c0\u3092\u6301\u3063\u3066\u3044\u308b\u30a2\u30a4\u30c6\u30e0\u306f\u3059\u3079\u3066\u30b9\u30ad\u30c3\u30d7\u3055\u308c\u307e\u3059\u3002\u3082\u3057<font style=\"color:#ff0000\" class=\"has-text-color\">items<\/font>\u30c7\u30a3\u30ec\u30af\u30c8\u30ea\u304c\u4e0a\u8a18\u306e\u3088\u3046\u306a\u5834\u5408\u306b\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u3092\u518d\u3073\u5b9f\u884c\u3059\u308b\u3068\u3001\u3059\u3067\u306b\u30b5\u30d6\u30d5\u30a9\u30eb\u30c0\u304c\u3042\u308b\u30a2\u30a4\u30c6\u30e0\u306e\u51e6\u7406\u306f\u30b9\u30ad\u30c3\u30d7\u3055\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"anaconda-optional\">Anaconda\uff08\u30aa\u30d7\u30b7\u30e7\u30f3\uff09<\/h3>\n\n\n\n<p>Anaconda\u3068\u306f\u3001\u79d1\u5b66\u8a08\u7b97\u7528\u306ePython\u30c7\u30a3\u30b9\u30c8\u30ea\u30d3\u30e5\u30fc\u30b7\u30e7\u30f3\u3068\u3057\u3066\u4e3b\u8981\u306a\u3082\u306e\u3067\u3059\u3002\u3053\u306e\u30d1\u30c3\u30b1\u30fc\u30b8\u30de\u30cd\u30fc\u30b8\u30e3<font style=\"color:#ff0000\" class=\"has-text-color\">conda<\/font>\u3067\u3001<font style=\"color:#ff0000\" class=\"has-text-color\">numpy<\/font>\u3084<font style=\"color:#ff0000\" class=\"has-text-color\">tensorflow<\/font>\u306a\u3069\u306e\u30e9\u30a4\u30d6\u30e9\u30ea\u3092\u7c21\u5358\u306b\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3059\u308b\u3053\u3068\u304c\u53ef\u80fd\u306b\u306b\u306a\u308a\u307e\u3059\u3002\u300cMiniconda\u300d\u30d0\u30fc\u30b8\u30e7\u30f3\u306b\u306f\u4f59\u8a08\u306a\u30d1\u30c3\u30b1\u30fc\u30b8\u304c\u30d7\u30ea\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3055\u308c\u3066\u3044\u306a\u3044\u306e\u3067\u3001\u57fa\u672c\u74b0\u5883\u3092\u30af\u30ea\u30fc\u30f3\u306b\u4fdd\u3061\u3001\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u306b\u5fc5\u8981\u306a\u3082\u306e\u3060\u3051\u3092\u540d\u524d\u306e\u3064\u3044\u305f\u74b0\u5883\u306b\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<p><a href=\"https:\/\/conda.io\/miniconda.html\" target=\"_blank\" rel=\"noreferrer noopener\">Miniconda<\/a>\u3092\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3057\u3066\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3057\u307e\u3059\u3002Python\n3\u306e\u6700\u65b0\u306e\u5b89\u5b9a\u7248\u3092\u9078\u629e\u3057\u307e\u3059\u3002\u3059\u3079\u3066\u304c\u3046\u307e\u304f\u3044\u3051\u3070\u3001\u30b7\u30a7\u30eb\u3067<font style=\"color:#ff0000\" class=\"has-text-color\">which conda<\/font> (Linux\/macOS\u306e\u5834\u5408)\u3001\u307e\u305f\u306f\u3001<font style=\"color:#ff0000\" class=\"has-text-color\">where conda<\/font> (Windows\u306e\u5834\u5408)\u3092\u5b9f\u884c\u3059\u308b\u3068\u3001\u51fa\u529b\u3067\u5b9f\u884c\u53ef\u80fd\u306a\u30d7\u30ed\u30b0\u30e9\u30e0\u306e\u5834\u6240\u3092\u78ba\u8a8d\u3067\u304d\u308b\u306f\u305a\u3067\u3059<\/p>\n\n\n\n<p>Anaconda\u306b\u306f\u3088\u304f\u4f7f\u308f\u308c\u308b\u30b3\u30de\u30f3\u30c9\u306e\u4fbf\u5229\u306a<a href=\"https:\/\/web.archive.org\/web\/20190115051900\/https:\/\/conda.io\/docs\/_downloads\/conda-cheatsheet.pdf\">\u30c1\u30fc\u30c8\u30b7\u30fc\u30c8<\/a>\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<h4 class=\"wp-block-heading\" id=\"create-an-environment\">\u25b6\ufe0e\u74b0\u5883\u8a2d\u5b9a<\/h4>\n\n\n\n<p>\u74b0\u5883\u306f\u3001\u8907\u6570\u306e\u30d1\u30c3\u30b1\u30fc\u30b8\u30de\u30cd\u30fc\u30b8\u30e3\u3092\u540c\u6642\u306b\u4f7f\u7528\u3059\u308b\u969b\u306e\u8907\u96d1\u3055\u3092\u30b3\u30f3\u30c8\u30ed\u30fc\u30eb\u3059\u308b\u306e\u306b\u7279\u306b\u5f79\u7acb\u3061\u307e\u3059\u3002\u3059\u3079\u3066\u306ePython\u30e9\u30a4\u30d6\u30e9\u30ea\u304c<font style=\"color:#ff0000\" class=\"has-text-color\">conda<\/font>\u3092\u901a\u3058\u3066\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3067\u304d\u308b\u308f\u3051\u3067\u306f\u3042\u308a\u307e\u305b\u3093\u3002\u3044\u304f\u3064\u304b\u306e\u30b1\u30fc\u30b9\u3067\u306f\u3001Python\u306e\u6a19\u6e96\u30d1\u30c3\u30b1\u30fc\u30b8\u30de\u30cd\u30fc\u30b8\u30e3\u3067\u3042\u308b<font style=\"color:#ff0000\" class=\"has-text-color\">pip<\/font>\uff08\u3042\u308b\u3044\u306f<font style=\"color:#ff0000\" class=\"has-text-color\">pipenv<\/font>\u306e\u3088\u3046\u306a\u4ee3\u66ff\u54c1\uff09\u3092\u983c\u308b\u3053\u3068\u306b\u306a\u308b\u3067\u3057\u3087\u3046\u3002\u3057\u304b\u3057\u3001\u305d\u3046\u3059\u308b\u5834\u5408\u306f\u3001<font style=\"color:#ff0000\" class=\"has-text-color\">conda<\/font>\u306b\u3088\u308a\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3055\u308c\u305f<font style=\"color:#ff0000\" class=\"has-text-color\">pip<\/font>\u3092\u4f7f\u7528\u3057\u307e\u3059\u3002\u3053\u308c\u306b\u3088\u308a\u3001\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u306b\u5fc5\u8981\u306a\u3059\u3079\u3066\u306e\u30d1\u30c3\u30b1\u30fc\u30b8\u3092\u540c\u3058\u4eee\u60f3\u30b5\u30f3\u30c9\u30dc\u30c3\u30af\u30b9\u306b\u5165\u308c\u3066\u304a\u304f\u3053\u3068\u304c\u51fa\u6765\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-preformatted has-background has-very-light-gray-background-color\"> <em># \u3042\u306a\u305f\u306e\u73fe\u5728\u306e\u74b0\u5883\u306f\u3001\u524d\u306e\u30a2\u30b9\u30bf\u30ea\u30b9\u30af\u3067\u793a\u3055\u308c\u3066\u3044\u307e\u3059\u3002<\/em>\n <em># (\u65b0\u3057\u3044\u30b7\u30a7\u30eb\u3067\u306f \"\u30d9\u30fc\u30b9 \"\u306b\u306a\u308a\u307e\u3059)<\/em>\n conda env list\n &nbsp;\n <em># installed packages in the current environment<\/em>\n conda list <\/pre>\n\n\n\n<p>\u6b21\u306b\u3001\u540d\u524d\u306e\u4ed8\u3044\u305f\u74b0\u5883\u3092\u4f5c\u6210\u3057\u3001Python\n3\u3092\u4f7f\u3046\u3088\u3046\u306b\u8a2d\u5b9a\u3057\u3066\u3001\u958b\u59cb\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-preformatted has-background has-very-light-gray-background-color\"> <em># --name\u30d5\u30e9\u30b0\u306f\u6587\u5b57\u5217\u306e\u5f15\u6570\u3092\u53d6\u308b\u3053\u3068 (\u4f8b: \"extract-pages\")\u3068<\/em>\n <em># Python \u306e\u30d0\u30fc\u30b8\u30e7\u30f3\u3092\u6307\u5b9a\u3059\u308b\u305f\u3081\u306e\u69cb\u6587\u306b\u6ce8\u610f\u3057\u3066\u304f\u3060\u3055\u3044<\/em>\n conda create --name extract-pages python<strong>=<\/strong>3\n &nbsp;\n <em># \u65b0\u3057\u3044\u74b0\u5883\u306b\u5165\u308b (macOS\/Linux)<\/em>\n source activate extract-pages <\/pre>\n\n\n\n<pre class=\"wp-block-preformatted has-background has-very-light-gray-background-color\">  <em># \u74b0\u5883\u3092\u8d77\u52d5\u3059\u308b\u305f\u3081\u306eWindows\u30b3\u30de\u30f3\u30c9\u304c\u82e5\u5e72\u7570\u306a\u308a\u307e\u3059<\/em>\n conda activate extract-pages <\/pre>\n\n\n\n<p>\u74b0\u5883\u304b\u3089\u7d42\u4e86\u3059\u308b\u306b\u306f\u3001macOS\/Linux\u3067\u306f<font style=\"color:#ff0000\" class=\"has-text-color\">source deactivate<\/font>\u3092\u3001Windows\u3067\u306f<font style=\"color:#ff0000\" class=\"has-text-color\">deactivate<\/font>\u3092\u5b9f\u884c\u3057\u3066\u304f\u3060\u3055\u3044\u3002\u305f\u3060\u3057\u3001\u30ec\u30c3\u30b9\u30f3\u4e2d\u306f<font style=\"color:#ff0000\" class=\"has-text-color\">extract-pages<\/font>\u74b0\u5883\u306b\u3044\u308b\u3053\u3068\u3092\u78ba\u8a8d\u3057\u3066\u304f\u3060\u3055\u3044\uff01<\/p>\n\n\n\n<h4 class=\"wp-block-heading\" id=\"install-conda-packages\">\u25b6\ufe0eConda\u30d1\u30c3\u30b1\u30fc\u30b8\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb<\/h4>\n\n\n\n<p>\u6700\u521d\u306e\u3044\u304f\u3064\u304b\u306e\u30d1\u30c3\u30b1\u30fc\u30b8\u306f<font style=\"color:#ff0000\" class=\"has-text-color\">conda<\/font>\u3092\u4f7f\u3063\u3066\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3067\u304d\u307e\u3059\u3002\u4ed6\u306e\u5fc5\u8981\u306a\u30d1\u30c3\u30b1\u30fc\u30b8\uff08gzip\u3001json\u3001os\u3001sys\u3001time\uff09\u306f<a href=\"https:\/\/docs.python.org\/3\/library\/\">Python\u6a19\u6e96\u30e9\u30a4\u30d6\u30e9\u30ea<\/a>\u306e\u4e00\u90e8\u3067\u3059\u3002\u30c1\u30e3\u30f3\u30cd\u30eb\u3092\u6307\u5b9a\u3059\u308b\u5fc5\u8981\u304c\u3042\u308b\u3053\u3068\u306b\u6ce8\u610f\u3057\u3066\u304f\u3060\u3055\u3044\u3002<a href=\"https:\/\/anaconda.org\/\">Anaconda Cloud<\/a>\u3067\u30d1\u30c3\u30b1\u30fc\u30b8\u3092\u691c\u7d22\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-preformatted has-background has-very-light-gray-background-color\">  <em># \u30ed\u30fc\u30ab\u30eb\u30d0\u30fc\u30b8\u30e7\u30f3\u306e pip \u304c\u3042\u308b\u3053\u3068\u3092\u78ba\u8a8d\u3057\u307e\u3059 (\u4ee5\u4e0b\u306e\u8aac\u660e\u3092\u53c2\u7167\u3057\u3066\u304f\u3060\u3055\u3044)<\/em>\n conda install pip\n &nbsp;\n conda install jupyter\n &nbsp;\n conda install --channel anaconda requests <\/pre>\n\n\n\n<p>Jupyter\u306b\u306f\u591a\u304f\u306e\u4f9d\u5b58\u95a2\u4fc2\uff08\u5fc5\u8981\u3068\u3059\u308b\u4ed6\u306e\u30d1\u30c3\u30b1\u30fc\u30b8\uff09\u304c\u3042\u308b\u305f\u3081\u3001\u3053\u306e\u30b9\u30c6\u30c3\u30d7\u306b\u306f\u6570\u5206\u304b\u304b\u308b\u5834\u5408\u304c\u3042\u308a\u307e\u3059\u3002<font style=\"color:#ff0000\" class=\"has-text-color\">conda<\/font>\u304c<font style=\"color:#ff0000\" class=\"has-text-color\">Proceed ([y]\/n)?<\/font>\u3068\u3044\u3046\u8868\u793a\u3092\u3060\u3057\u305f\u3089\u3001<font style=\"color:#ff0000\" class=\"has-text-color\">y<\/font>\u307e\u305f\u306f<font style=\"color:#ff0000\" class=\"has-text-color\">yes<\/font>\u3068\u5165\u529b\u3057\u3066\u304b\u3089Enter\u30ad\u30fc\u3092\u62bc\u3057\u3066\u3001\u30d1\u30c3\u30b1\u30fc\u30b8\u30d6\u30e9\u30f3\u3092\u53d7\u3051\u5165\u308c\u308b\u3053\u3068\u3092\u899a\u3048\u3066\u304a\u3044\u3066\u304f\u3060\u3055\u3044\u3002<\/p>\n\n\n\n<p style=\"background-color:#faf2cc;color:#8a6d3b\" class=\"has-text-color has-background\">\u88cf\u3067\u306f\u3001conda\u306f\u5fc5\u8981\u306a\u30d1\u30c3\u30b1\u30fc\u30b8\u3068\u4f9d\u5b58\u95a2\u4fc2\u304c\u3059\u3079\u3066\u4e92\u63db\u6027\u306e\u3042\u308b\u65b9\u6cd5\u3067\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3055\u308c\u308b\u3053\u3068\u3092\u78ba\u8a8d\u3059\u308b\u305f\u3081\u306b\u52d5\u3044\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<h4 class=\"wp-block-heading\" id=\"install-pip-packages\">\u25b6\ufe0ePip\u30d1\u30c3\u30b1\u30fc\u30b8\u306e\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb<\/h4>\n\n\n\n<p><font style=\"color:#ff0000\" class=\"has-text-color\">condaa<\/font>\u74b0\u5883\u3092\u4f7f\u7528\u3057\u3066\u3044\u308b\u5834\u5408\u306f\u3001\u30ed\u30fc\u30ab\u30eb\u30d0\u30fc\u30b8\u30e7\u30f3\u306e<font style=\"color:#ff0000\" class=\"has-text-color\">pip<\/font>\u3092\u4f7f\u7528\u3059\u308b\u306e\u304c\u30d9\u30b9\u30c8\u3067\u3059\u3002\u4ee5\u4e0b\u306e\u30b3\u30de\u30f3\u30c9\u3067\u3001<font style=\"color:#ff0000\" class=\"has-text-color\">\/Miniconda\/envs\/extract-pages\/Scripts\/pip<\/font>\u306e\u3088\u3046\u306a\u7d76\u5bfe\u30d1\u30b9\u3092\u542b\u3080\u30d7\u30ed\u30b0\u30e9\u30e0\u3092\u51fa\u529b\u3057\u3066\u3044\u308b\u3053\u3068\u3092\u78ba\u8a8d\u3057\u3066\u304f\u3060\u3055\u3044\u3002<\/p>\n\n\n\n<p class=\"has-background has-very-light-gray-background-color\">which pip<\/p>\n\n\n\n<pre class=\"wp-block-preformatted has-background has-very-light-gray-background-color\">  <em># Windows\u3067\u306f\"which\"\u3068\u540c\u7b49<\/em>\n where pip <\/pre>\n\n\n\n<p>\u4e0a\u306e\u51fa\u529b\u306b2\u3064\u306e\u30d0\u30fc\u30b8\u30e7\u30f3\u306e<font style=\"color:#ff0000\" class=\"has-text-color\">pip<\/font>\u304c\u3042\u308b\u5834\u5408\u3001API\u30e9\u30c3\u30d1\u30fc\u30e9\u30a4\u30d6\u30e9\u30ea\u3092\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3059\u308b\u969b\u306b\u300c\u30ed\u30fc\u30ab\u30eb\u300d\u74b0\u5883\u306e\u30d0\u30fc\u30b8\u30e7\u30f3\u3078\u306e\u30d5\u30eb\u30d1\u30b9\u3092\u5165\u529b\u3057\u3066\u304f\u3060\u3055\u3044\u3002<\/p>\n\n\n\n<pre class=\"wp-block-preformatted has-background has-very-light-gray-background-color\"> pip install hathitrust-api\n pip install internetarchive <\/pre>\n\n\n\n<pre class=\"wp-block-preformatted has-background has-very-light-gray-background-color\"> <em># *\u30ed\u30fc\u30ab\u30eb\u306e*pip\u5b9f\u884c\u30d5\u30a1\u30a4\u30eb\u3078\u306e\u7d76\u5bfe\u30d1\u30b9\u3092\u4f7f\u7528\u3057\u305fWindows\u306e\u4f8b<\/em>\n C:<font style=\"color:#ff0000\" class=\"has-text-color\">\\U<\/font>sers<font style=\"color:#ff0000\" class=\"has-text-color\">\\s<\/font>tephen-krewson<font style=\"color:#ff0000\" class=\"has-text-color\">\\M<\/font>iniconda<font style=\"color:#ff0000\" class=\"has-text-color\">\\e<\/font>nvs<font style=\"color:#ff0000\" class=\"has-text-color\">\\e<\/font>xtract-pages<font style=\"color:#ff0000\" class=\"has-text-color\">\\S<\/font>cripts<font style=\"color:#ff0000\" class=\"has-text-color\">\\p<\/font>ip.exe install hathitrust-api internetarchive <\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"jupyter-notebooks\">Jupyter Notebook<\/h3>\n\n\n\n<p>Peter\nOrganisciak\u3068Boris\nCapitanu\u306e\u300cHTRC Feature\nReader\u3092\u4f7f\u3063\u305fPython\u3067\u306e\u30c6\u30ad\u30b9\u30c8\u30de\u30a4\u30cb\u30f3\u30b0\u300d\uff08<a href=\"https:\/\/programminghistorian.org\/en\/lessons\/text-mining-with-extracted-features#start-a-notebook\">Text\nMining in Python through the HTRC Feature Reader<\/a>\uff09\u306f\u3001\u958b\u767a\u3084\u30c7\u30fc\u30bf\u63a2\u7d22\u306e\u305f\u3081\u306e\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u306e\u5229\u70b9\u3092\u8aac\u660e\u3057\u3066\u3044\u307e\u3059\u3002\u307e\u305f\u3001\u3053\u306e\u30da\u30fc\u30b8\u306b\u306f\u3001\u30bb\u30eb\u3092\u52b9\u679c\u7684\u306b\u5b9f\u884c\u3059\u308b\u65b9\u6cd5\u306b\u3064\u3044\u3066\u3082\u6709\u76ca\u306a\u60c5\u5831\u304c\u63b2\u8f09\u3055\u308c\u3066\u3044\u307e\u3059\u3002\u79c1\u305f\u3061\u306f\u3001Anaconda\u306e\u6700\u5c0f\u9650\u306e\u30d0\u30fc\u30b8\u30e7\u30f3\u3092\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3057\u305f\u306e\u3067\u3001\u30b3\u30de\u30f3\u30c9\u30e9\u30a4\u30f3\u304b\u3089Jupyter\u3092\u8d77\u52d5\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002\uff08\u30ec\u30c3\u30b9\u30f3\u30d5\u30a1\u30a4\u30eb\u3092\u542b\u3080\u30d5\u30a9\u30eb\u30c0\u5185\u304b\u3089\uff09\u30b7\u30a7\u30eb\u3067<font style=\"color:#ff0000\" class=\"has-text-color\">jupyter\nnotebook<\/font>\u3092\u5b9f\u884c\u3057\u3066\u304f\u3060\u3055\u3044\u3002<\/p>\n\n\n\n<p>\u4e0b\u306e\u753b\u9762\u306f\u3001\u30b7\u30a7\u30eb\u3067\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u30b5\u30fc\u30d0\u3092\u5b9f\u884c\u3057\u3001Jupyter\n\u30db\u30fc\u30e0\u30da\u30fc\u30b8\u3067\u30c7\u30d5\u30a9\u30eb\u30c8\u306e\u30d6\u30e9\u30a6\u30b6\u3092\u8d77\u52d5\u3059\u308b\u3068\u3002\u30db\u30fc\u30e0\u30da\u30fc\u30b8\u306b\u306f\u3001\u73fe\u5728\u306e\u4f5c\u696d\u30c7\u30a3\u30ec\u30af\u30c8\u30ea\u306b\u3042\u308b\u3059\u3079\u3066\u306e\u30d5\u30a1\u30a4\u30eb\u304c\u8868\u793a\u3055\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter\"><img decoding=\"async\" loading=\"lazy\" width=\"1197\" height=\"408\" src=\"https:\/\/www.dh.ku-orcas.kansai-u.ac.jp\/wp-content\/uploads\/jupyter-home.png\" alt=\"\" class=\"wp-image-444\"\/><\/figure><\/div>\n\n\n\n<p style=\"text-align:center\"><em>\u30ec\u30c3\u30b9\u30f3\u30d5\u30a1\u30a4\u30eb\u3092\u8868\u793a\u3059\u308bJupyter\u30db\u30fc\u30e0\u30da\u30fc\u30b8<\/em><\/p>\n\n\n\n<p style=\"background-color:#faf2cc;color:#8a6d3b\" class=\"has-text-color has-background\">\u30b7\u30a7\u30eb\u3067\u3001\u89e3\u51cd\u3055\u308c\u305flesson-files&nbsp;\u30c7\u30a3\u30ec\u30af\u30c8\u30ea\u306bcd\u3067\u79fb\u52d5\u3057\u3066\u3044\u308b\u3053\u3068\u3092\u78ba\u8a8d\u3059\u308b\u3053\u3068\u3002<\/p>\n\n\n\n<p><font style=\"color:#ff0000\" class=\"has-text-color\">hathitrust.ipynb<\/font>\u3068<font style=\"color:#ff0000\" class=\"has-text-color\">internetarchive.ipynb<\/font>\u306e\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u3092\u30af\u30ea\u30c3\u30af\u3057\u3066\u3001\u65b0\u3057\u3044\u30d6\u30e9\u30a6\u30b6\u30bf\u30d6\u3092\u958b\u304d\u307e\u3059\u3002\u3053\u3053\u304b\u3089\u306f\u3001\u30b7\u30a7\u30eb\u306e\u30b3\u30de\u30f3\u30c9\u3092\u5b9f\u884c\u3059\u308b\u5fc5\u8981\u306f\u3042\u308a\u307e\u305b\u3093\u3002\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u306e\u304a\u304b\u3052\u3067\u3001Python\u306e\u30b3\u30fc\u30c9\u3092\u5b9f\u884c\u3067\u304d\u3001\u30b3\u30f3\u30d4\u30e5\u30fc\u30bf\u306e\u30d5\u30a1\u30a4\u30eb\u30b7\u30b9\u30c6\u30e0\u3078\u30d5\u30eb\u30a2\u30af\u30bb\u30b9\u304c\u3067\u304d\u308b\u3088\u3046\u306b\u306a\u308a\u307e\u3057\u305f\u3002\u7d42\u4e86\u3057\u305f\u3089\u3001Jupyter\u30db\u30fc\u30e0\u30da\u30fc\u30b8\u306e\u300cQuit\u300d\u3092\u30af\u30ea\u30c3\u30af\u3059\u308b\u304b\u3001\u30b7\u30a7\u30eb\u3067<font style=\"color:#ff0000\" class=\"has-text-color\">ctrl+c<\/font>\u3092\u5b9f\u884c\u3059\u308b\u3053\u3068\u3067\u3001\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u30b5\u30fc\u30d0\u3092\u505c\u6b62\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"hathitrust\">HathiTrust<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"api-access\">API\u30a2\u30af\u30bb\u30b9<\/h3>\n\n\n\n<p>\u30c7\u30fc\u30bfAPI\u3092\u4f7f\u3046\u524d\u306b\u3001HathiTrust\u306b\u767b\u9332\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002<a href=\"https:\/\/babel.hathitrust.org\/cgi\/kgs\/request\">\u767b\u9332\u30dd\u30fc\u30bf\u30eb<\/a>\u3067\u3001\u540d\u524d\u3001\u7d44\u7e54\u540d\u3001\u30e1\u30fc\u30eb\u30a2\u30c9\u30ec\u30b9\u3092\u5165\u529b\u3057\u3066\u30a2\u30af\u30bb\u30b9\u30ad\u30fc\u3092\u30ea\u30af\u30a8\u30b9\u30c8\u3057\u3066\u304f\u3060\u3055\u3044\u3002\u6570\u5206\u3067\u30e1\u30fc\u30eb\u306e\u8fd4\u4fe1\u304c\u5c4a\u304d\u307e\u3059\u3002\u30ea\u30f3\u30af\u3092\u30af\u30ea\u30c3\u30af\u3059\u308b\u3068\u30012\u3064\u306e\u30ad\u30fc\u304c\u8868\u793a\u3055\u308c\u305f\u30ef\u30f3\u30bf\u30a4\u30e0\u306e\u30da\u30fc\u30b8\u306b\u30a2\u30af\u30bb\u30b9\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<p><font style=\"color:#ff0000\" class=\"has-text-color\">hathitrust.ipynb<\/font>\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u3067\u3001\u6700\u521d\u306e\u30bb\u30eb\uff08\u4e0b\u8a18\uff09\u3092\u8abf\u3079\u3066\u304f\u3060\u3055\u3044\u3002\u6307\u793a\u901a\u308a\u306bAPI\u30c8\u30fc\u30af\u30f3\u3092\u5165\u529b\u3057\u307e\u3059\u3002\u305d\u3057\u3066\u3001\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u306e\u30ca\u30d3\u30b2\u30fc\u30b7\u30e7\u30f3\u30d0\u30fc\u3067\u300cRun\u300d\u3092\u30af\u30ea\u30c3\u30af\u3059\u308b\u3053\u3068\u3067\u30bb\u30eb\u3092\u5b9f\u884c\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-preformatted has-background has-very-light-gray-background-color\"> <em># HT Data API\u30e9\u30c3\u30d1\u30fc\u306e\u30a4\u30f3\u30dd\u30fc\u30c8<\/em>\n <strong>from<\/strong> hathitrust_api <strong>import<\/strong> DataAPI\n &nbsp;\n <em># \u30d7\u30ec\u30fc\u30b9\u30db\u30eb\u30c0\u306e\u6587\u5b57\u5217\u3092HT\u306e\u8cc7\u683c\u60c5\u5831\u306b\u7f6e\u304d\u63db\u3048\u3066\u304f\u3060\u3055\u3044(\u5f15\u7528\u7b26\u306f\u6b8b\u3057\u3066\u304f\u3060\u3055\u3044)<\/em>\n ht_access_key <strong>=<\/strong> \"YOUR_ACCESS_KEY_HERE\"\n ht_secret_key <strong>=<\/strong> \"YOUR_SECRET_KEY_HERE\"\n &nbsp;\n <em># Data API \u63a5\u7d9a\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u306e\u30a4\u30f3\u30b9\u30bf\u30f3\u30b9\u3092\u4f5c\u6210\u3057\u307e\u3059\u3002<\/em>\n data_api <strong>=<\/strong> DataAPI(ht_access_key, ht_secret_key) <\/pre>\n\n\n\n<p style=\"background-color:#faf2cc;color:#8a6d3b\" class=\"has-text-color has-background\">\u6ce8\u610f\uff01\u30a2\u30af\u30bb\u30b9\u30c8\u30fc\u30af\u30f3\u3092\nGitHub (\u307e\u305f\u306f\u4ed6\u306e\u30d0\u30fc\u30b8\u30e7\u30f3\u7ba1\u7406\u30db\u30b9\u30c8)\n\u306e\u516c\u958b\u30ec\u30dd\u3067\u516c\u958b\u3057\u306a\u3044\u3067\u304f\u3060\u3055\u3044\u3002\u8ab0\u3067\u3082\u691c\u7d22\u3067\u304d\u308b\u3088\u3046\u306b\u306a\u3063\u3066\u3057\u307e\u3044\u307e\u3059\u3002Python\n\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\u3067\u306e\u826f\u3044\u65b9\u6cd5\u306f\u3001\u30c8\u30fc\u30af\u30f3\u3092\u74b0\u5883\u5909\u6570\u3068\u3057\u3066\u4fdd\u5b58\u3059\u308b\u304b\u3001\u30d0\u30fc\u30b8\u30e7\u30f3\u7ba1\u7406\u3055\u308c\u3066\u3044\u306a\u3044\u30d5\u30a1\u30a4\u30eb\u306b\u4fdd\u5b58\u3059\u308b\u3053\u3068\u3067\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"create-volume-list\">\u8cc7\u6599\u30ea\u30b9\u30c8\u306e\u4f5c\u6210<\/h3>\n\n\n\n<p>HT\u3067\u306f\u3001\u30ed\u30b0\u30a4\u30f3\u3057\u3066\u3044\u306a\u304f\u3066\u3082\u3001\u8ab0\u3067\u3082\u8cc7\u6599\u30b3\u30ec\u30af\u30b7\u30e7\u30f3\u3092\u4f5c\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\uff01\u305f\u3060\u3057\u3001\u8cc7\u6599\u30ea\u30b9\u30c8\u3092\u4fdd\u5b58\u3057\u305f\u3044\u5834\u5408\u306f\u3001\u30a2\u30ab\u30a6\u30f3\u30c8\u767b\u9332\u3092\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002<a href=\"https:\/\/babel.hathitrust.org\/cgi\/mb?colltype=updated\">\u6307\u793a<\/a>\u306b\u5f93\u3063\u3066\u3001\u3044\u304f\u3064\u304b\u5168\u6587\u691c\u7d22\u3092\u884c\u3044\u3001\u9078\u629e\u3057\u305f\u7d50\u679c\u3092\u30b3\u30ec\u30af\u30b7\u30e7\u30f3\u306b\u8ffd\u52a0\u3057\u307e\u3059\u3002\u73fe\u5728\u3001HathiTrust\u3067\u306f\u3001\u30d7\u30ed\u30b0\u30e9\u30e0\u3067\u66f8\u7c4d\u3092\u53d6\u5f97\u3059\u308b\u305f\u3081\u306e\u691c\u7d22\u7528API\u304c\u516c\u958b\u3055\u308c\u3066\u3044\u306a\u3044\u305f\u3081\u3001\u30a6\u30a7\u30d6\u30a4\u30f3\u30bf\u30fc\u30d5\u30a7\u30a4\u30b9\u3067\u691c\u7d22\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u30b3\u30ec\u30af\u30b7\u30e7\u30f3\u3092\u66f4\u65b0\u3059\u308b\u3068\u3001HT\u306f\u30b3\u30ec\u30af\u30b7\u30e7\u30f3\u5185\u306e\u5404\u8cc7\u6599\u306b\u95a2\u3059\u308b\u30e1\u30bf\u30c7\u30fc\u30bf\u3092\u8ffd\u8de1\u3057\u307e\u3059\u3002\u30ec\u30c3\u30b9\u30f3\u30d5\u30a1\u30a4\u30eb\u306b\u306f\u3001JSON\u5f62\u5f0f\u306e\u30b5\u30f3\u30d7\u30eb\u30ec\u30c3\u30b9\u30f3\u306e\u30e1\u30bf\u30c7\u30fc\u30bf\u304c\u542b\u307e\u308c\u3066\u3044\u307e\u3059\u3002\u3082\u3057\u3042\u306a\u305f\u81ea\u8eab\u306eHT\u30b3\u30ec\u30af\u30b7\u30e7\u30f3\u306e\u30d5\u30a1\u30a4\u30eb\u3092\u4f7f\u3044\u305f\u3044\u5834\u5408\u306f\u3001\u30b3\u30ec\u30af\u30b7\u30e7\u30f3\u30da\u30fc\u30b8\u306b\u79fb\u52d5\u3057\u3066\u3001\u5de6\u5074\u306b\u3042\u308b\u30e1\u30bf\u30c7\u30fc\u30bf\u306e\u30ea\u30f3\u30af\u306e\u4e0a\u306b\u30ab\u30fc\u30bd\u30eb\u3092\u7f6e\u304f\u3068\u3001\u4e0b\u306e\u30b9\u30af\u30ea\u30fc\u30f3\u30b7\u30e7\u30c3\u30c8\u306b\u3042\u308b\u3088\u3046\u306bJSON\u5f62\u5f0f\u3067\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3059\u308b\u30aa\u30d7\u30b7\u30e7\u30f3\u304c\u8868\u793a\u3055\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<div class=\"wp-block-image\"><figure class=\"aligncenter\"><img decoding=\"async\" loading=\"lazy\" width=\"1004\" height=\"574\" src=\"https:\/\/www.dh.ku-orcas.kansai-u.ac.jp\/wp-content\/uploads\/download-ht-json.png\" alt=\"\" class=\"wp-image-445\"\/><figcaption>  \u30b3\u30ec\u30af\u30b7\u30e7\u30f3\u30e1\u30bf\u30c7\u30fc\u30bf\u3092<em>JSON<\/em>\u5f62\u5f0f\u3067\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3059\u308b <\/figcaption><\/figure><\/div>\n\n\n\n<p>JSON\n\u30d5\u30a1\u30a4\u30eb\u3092\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3057\u305f\u3089\u3001Jupyter\nNotebook\u3092\u914d\u7f6e\u3057\u305f\u30c7\u30a3\u30ec\u30af\u30c8\u30ea\u306b\u79fb\u52d5\u3059\u308b\u3060\u3051\u3067\u3059\u3002HT\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u5185\u306eJSON\u30d5\u30a1\u30a4\u30eb\u306e\u540d\u524d\u3092\u3001\u30b3\u30ec\u30af\u30b7\u30e7\u30f3\u306e\u30d5\u30a1\u30a4\u30eb\u540d\u306b\u7f6e\u304d\u63db\u3048\u3066\u304f\u3060\u3055\u3044\u3002<\/p>\n\n\n\n<p>\u3053\u306e\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u3067\u306f\u3001\u30ea\u30b9\u30c8\u5185\u5305\u8868\u8a18\u3092\u4f7f\u3044\u3001\u30b3\u30ec\u30af\u30b7\u30e7\u30f3\u306e\u5168\u60c5\u5831\u3092\u542b\u3080<font style=\"color:#ff0000\" class=\"has-text-color\">gathers<\/font>\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u5185\u306b\u3042\u308b\u5168\u3066\u306e<font style=\"color:#ff0000\" class=\"has-text-color\">htitem_id<\/font>\u6587\u5b57\u5217\u3092\u5f97\u308b\u65b9\u6cd5\u3092\u793a\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\"> # \u3053\u3053\u3067\u30b3\u30ec\u30af\u30b7\u30e7\u30f3\u306e\u30e1\u30bf\u30c7\u30fc\u30bf\u30d5\u30a1\u30a4\u30eb\u3092\u6307\u5b9a\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\n metadata_path = \"554050894-1535834127.json\"\n &nbsp;\n with open(metadata_path, \"r\") as fp:\n &nbsp;&nbsp;&nbsp; data = json.load(fp)\n &nbsp;\n # \u30b3\u30ec\u30af\u30b7\u30e7\u30f3\u5185\u306e\u3059\u3079\u3066\u306e\u30e6\u30cb\u30fc\u30afID\u306e\u30ea\u30b9\u30c8\n vol_ids = [item['htitem_id'] for item in data['gathers']] <\/pre>\n\n\n\n<p style=\"background-color:#faf2cc;color:#8a6d3b\" class=\"has-text-color has-background\">\u4ed6\u306e\u30c1\u30e5\u30fc\u30c8\u30ea\u30a2\u30eb\u3067\u306f\u30011\u3064\u306e\u30a2\u30a4\u30c6\u30e0\u3092\u3069\u306e\u3088\u3046\u306b\u51e6\u7406\u3059\u308b\u304b\u3092\u793a\u3059\u3082\u306e\u304c\u3057\u3070\u3057\u3070\u3067\u3059\uff08\u30b5\u30a4\u30ba\u306f\u5c0f\u898f\u6a21\u306e\u3082\u306e\u3084\u8907\u96d1\u306a\u3082\u306e\u304c\u591a\u3044\u3067\u3059\uff09\u3002\u6559\u80b2\u7684\u306b\u306f\u4fbf\u5229\u3067\u3059\u304c\u3001\u3053\u308c\u3067\u306f\u8907\u6570\u306e\u30a2\u30a4\u30c6\u30e0\u306b\u30b3\u30fc\u30c9\u3092\u9069\u7528\u3059\u308b\u65b9\u6cd5\u304c\u8eab\u306b\u3064\u304d\u307e\u305b\u3093\u3002\u3053\u306e\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u3067\u306f\u3001\uff11\u3064\u306e\u30a2\u30a4\u30c6\u30e0\u306b\u9069\u7528\u3059\u308b\u5909\u63db\u65b9\u6cd5\u3092\u3001\u30a2\u30a4\u30c6\u30e0\u306e\u30b3\u30ec\u30af\u30b7\u30e7\u30f3\u306e\u30eb\u30fc\u30d7\u5185\u3067\u547c\u3073\u51fa\u3059\u3053\u3068\u304c\u3067\u304d\u308b\u300c\u95a2\u6570\u300d\u306b\u30ab\u30d7\u30bb\u30eb\u5316\u3059\u308b\u65b9\u6cd5\u3092\u898b\u3066\u3044\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"visual-feature-image_on_page\">\u8996\u899a\u7684\u306a\u7279\u5fb4\uff1aIMAGE_ON_PAGE<\/h3>\n\n\n\n<p>\u8cc7\u6599\u30ea\u30b9\u30c8\u304c\u3042\u308b\u3068\u3001\u30da\u30fc\u30b8\u30ec\u30d9\u30eb\u3067\u3069\u306e\u3088\u3046\u306a\u8996\u899a\u7684\u306a\u7279\u5fb4\u304c\u3042\u308b\u306e\u304b\u3092\u63a2\u3063\u3066\u307f\u305f\u304f\u306a\u308b\u3060\u308d\u3046\u3002\uff08HT\u306e\uff09\u30c7\u30fc\u30bfAPI\u306e<a href=\"https:\/\/www.hathitrust.org\/documents\/hathitrust-data-api-v2_20150526.pdf\">\u6700\u65b0\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8(2015)<\/a>\u3067\u306f\u3001\uff19\uff5e\uff11\uff10\u30da\u30fc\u30b8\u76ee\u306b<font style=\"color:#ff0000\" class=\"has-text-color\">htd:pfeat<\/font>\u3068\u547c\u3070\u308c\u308b\u30e1\u30bf\u30c7\u30fc\u30bf\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u306b\u3064\u3044\u3066\u8a18\u8ff0\u3055\u308c\u3066\u3044\u307e\u3059\u3002<font style=\"color:#ff0000\" class=\"has-text-color\">htd:pfeat<\/font>\u306f\u300cHathiTrust\u30c7\u30fc\u30bfAPI\uff1a\u30da\u30fc\u30b8\u306e\u7279\u5fb4\u300d\u306e\u7565\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-preformatted has-background has-very-light-gray-background-color\">  \u00b7&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <em><font style=\"color:#ff0000\" class=\"has-text-color\">htd:pfeat\u00ad<\/font> - the page feature key (if available):<\/em>\n        o&nbsp;&nbsp; <em>CHAPTER_START<\/em>\n        o&nbsp;&nbsp; <em>COPYRIGHT<\/em>\n        o&nbsp;&nbsp; <em>FIRST_CONTENT_CHAPTER_START<\/em>\n        o&nbsp;&nbsp; <em>FRONT_COVER<\/em>\n        o&nbsp;&nbsp; <em>INDEX<\/em>\n        o&nbsp;&nbsp; <em>REFERENCES<\/em>\n        o&nbsp;&nbsp; <em>TABLE_OF_CONTENTS<\/em>\n        o&nbsp;&nbsp; <em>TITLE<\/em> <\/pre>\n\n\n\n<p><font style=\"color:#ff0000\" class=\"has-text-color\">hathitrust-api<\/font>\u30e9\u30c3\u30d1\u30fc\u304c\u884c\u3046\u306e\u306f\u3001HT\u8cc7\u6599\u306e\u5168\u30e1\u30bf\u30c7\u30fc\u30bf\u3092Python\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u3068\u3057\u3066\u5229\u7528\u3067\u304d\u308b\u3088\u3046\u306b\u3059\u308b\u3053\u3068\u3067\u3059\u3002\u8cc7\u6599\u306e\u8b58\u5225\u5b50\u304c\u3042\u308c\u3070\u3001\u305d\u306e\u30e1\u30bf\u30c7\u30fc\u30bf\u3092\u30ea\u30af\u30a8\u30b9\u30c8\u3057\u3066\u3001\u30da\u30fc\u30b8\u30b7\u30fc\u30b1\u30f3\u30b9\u304b\u3089\u30da\u30fc\u30b8\u30ec\u30d9\u30eb\u307e\u3067\u60c5\u5831\u3092\u6398\u308a\u4e0b\u3052\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002<font style=\"color:#ff0000\" class=\"has-text-color\">htd:pfeat<\/font>\u306e\u300c\u30ea\u30b9\u30c8\u300d\u306f\u672c\u306e\u5404\u30da\u30fc\u30b8\u306b\u5bfe\u5fdc\u3057\u3066\u304a\u308a\u3001\u7406\u8ad6\u7684\u306b\u306f\u305d\u306e\u30da\u30fc\u30b8\u306b\u5f53\u3066\u306f\u307e\u308b\u5168\u3066\u306e\u7279\u5fb4\u3092\u542b\u3093\u3067\u3044\u307e\u3059\u3002\u5b9f\u969b\u306b\u306f\u3001\u4e0a\u306b\u6319\u3052\u305f8\u3064\u306e\u7279\u5fb4\u30bf\u30b0\u3088\u308a\u3082\u304b\u306a\u308a\u591a\u304f\u306e\u30bf\u30b0\u304c\u3042\u308a\u307e\u3059\u3002\u79c1\u9054\u304c\u6271\u3046\u3053\u3068\u306b\u306a\u308b\u306e\u306f\u3001<font style=\"color:#ff0000\" class=\"has-text-color\">IMAGE_ON_PAGE<\/font>\u3068\u547c\u3070\u308c\u308b\u3082\u306e\u3067\u3001<font style=\"color:#ff0000\" class=\"has-text-color\">CHAPTER_START<\/font>\u306e\u3088\u3046\u306a\u69cb\u9020\u30bf\u30b0\u3088\u308a\u3082\u62bd\u8c61\u7684\u3067\u8996\u899a\u7684\u306a\u3082\u306e\u306b\u306a\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u30df\u30b7\u30ac\u30f3\u5927\u5b66\u56f3\u66f8\u9928\u306e\u7814\u7a76\u53f8\u66f8\u306eTom\nBurton-West\u306f\u3001HathiTrust\u3068HathiTrust\u7814\u7a76\u30bb\u30f3\u30bf\u30fc\uff08HTRC\uff09\u3068\u7dca\u5bc6\u306b\u9023\u643a\u3092\u3057\u3066\u3044\u307e\u3059\u3002Tom\u306f\u79c1\u306b\u3001HathiTrust\u304c2008\u5e74\u306eHT\u8a2d\u7acb\u4ee5\u6765\u3001\u5bc6\u63a5\u306b\u9023\u643a\u3057\u3066\u3044\u308bGoogle\u304b\u3089\u3001<font style=\"color:#ff0000\" class=\"has-text-color\">htd:pfeat<\/font>\u306e\u60c5\u5831\u3092\u63d0\u4f9b\u3055\u308c\u3066\u3044\u308b\u3068\u30e1\u30fc\u30eb\u3067\u6559\u3048\u3066\u304f\u308c\u307e\u3057\u305f\u3002Google\u306e\u62c5\u5f53\u8005\u306fTom\u306b\u4ee5\u4e0b\u306e\u60c5\u5831\u3092\u5171\u6709\u3059\u308b\u8a31\u53ef\u3092\u4e0e\u3048\u3066\u304f\u308c\u307e\u3057\u305f\u3002<\/p>\n\n\n\n<p class=\"has-background has-very-light-gray-background-color\"><em>\u3053\u308c\u3089\u306e\u30bf\u30b0\u306f\u3001\u30d2\u30e5\u30fc\u30ea\u30b9\u30c6\u30a3\u30c3\u30af\u3001\u6a5f\u68b0\u5b66\u7fd2\u3001\u4eba\u9593\u306b\u3088\u308b\u30bf\u30b0\u4ed8\u3051\u306e\u7d44\u307f\u5408\u308f\u305b\u3067\u4f5c\u6210\u3055\u308c\u305f\u3082\u306e\u3067\u3059\u3002<\/em><\/p>\n\n\n\n<p>\u30d2\u30e5\u30fc\u30ea\u30b9\u30c6\u30a3\u30c3\u30af\u306e\u4f8b\u3068\u3057\u3066\u306f\u3001\u8cc7\u6599\u306e\u30da\u30fc\u30b8\u30b7\u30fc\u30b1\u30f3\u30b9\u306e\u6700\u521d\u306e\u8981\u7d20\u304c\u307b\u307c\u5e38\u306b<font style=\"color:#ff0000\" class=\"has-text-color\">FRONT_COVER<\/font>\u3060\u3068\u8003\u3048\u3089\u308c\u307e\u3059\u3002\u6a5f\u68b0\u5b66\u7fd2\u306f\u3001\u4f8b\u3048\u3070\u6d0b\u66f8\u306e\u6563\u6587\u3084\u5f6b\u523b\u306b\u5f6b\u3089\u308c\u308b\u5178\u578b\u7684\u306a\u6570\u884c\u3068\u753b\u50cf\u30c7\u30fc\u30bf\u3068\u3092\u8b58\u5225\u3059\u308b\u305f\u3081\u306e\u30e2\u30c7\u30eb\u3092\u8a13\u7df4\u3059\u308b\u305f\u3081\u306b\u4f7f\u7528\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u4eba\u306b\u3088\u308b\u30bf\u30b0\u4ed8\u3051\u306f\u3001\u624b\u52d5\u3067\u753b\u50cf\u306b\u30e9\u30d9\u30eb\u3092\u5272\u308a\u5f53\u3066\u308b\u3053\u3068\u3067\u3059\u3002EEBO\u3084ECCO\u306a\u3069\u306e\u30c7\u30fc\u30bf\u30d9\u30fc\u30b9\u3067\u633f\u7d75\u304c\u898b\u3089\u308c\u308b\u306e\u306f\u3001\u4eba\u9593\u306b\u3088\u308b\u30bf\u30b0\u4ed8\u3051\u306e\u4f8b\u3067\u3059\u3002<\/p>\n\n\n\n<p>Google\u304c\u300c\u6a5f\u68b0\u5b66\u7fd2\u300d\u3092\u4f7f\u3063\u305f\u3068\u3044\u3046\u306e\u306f\u5c11\u3057\u4e0d\u601d\u8b70\u306a\u611f\u3058\u304c\u3057\u307e\u3059\u3002Google\u304c\u305d\u306e\u624b\u6cd5\u516c\u958b\u3059\u308b\u307e\u3067\u3001\u3059\u3079\u3066\u306e\u8a73\u7d30\u3092\u77e5\u308b\u3053\u3068\u306f\u4e0d\u53ef\u80fd\u3067\u3059\u3002\u3057\u304b\u3057<font style=\"color:#ff0000\" class=\"has-text-color\">IMAGE_ON_PAGE<\/font>\u30bf\u30b0\u306f\u3001OCR\u51fa\u529b\u30d5\u30a1\u30a4\u30eb\u304b\u3089\u300cPicture\u300d\u30d6\u30ed\u30c3\u30af\u3092\u691c\u51fa\u3059\u308b\u3053\u3068\u3067\u6700\u521d\u306b\u63d0\u6848\u3055\u308c\u305f\u53ef\u80fd\u6027\u304c\u9ad8\u3044\u3067\u3059\uff08\u3053\u306e\u4eee\u8aac\u306b\u3064\u3044\u3066\u306f\u3001\u5f8c\u8ff0\u3059\u308bInternet\nArchive\u306e\u7bc0\u3067\u8aac\u660e\u3057\u307e\u3059\uff09\u3002\u305d\u306e\u5f8c\u3001\u3055\u3089\u306a\u308b\u30d5\u30a3\u30eb\u30bf\u30ea\u30f3\u30b0\u304c\u9069\u7528\u3055\u308c\u3066\u3044\u308b\u306e\u304b\u3082\u3057\u308c\u307e\u305b\u3093\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"code-walk-through\">\u30b3\u30fc\u30c9\u30fb\u30a6\u30a9\u30fc\u30af\u30b9\u30eb\u30fc<\/h3>\n\n\n\n<h4 class=\"wp-block-heading\" id=\"find-pictures\">\u25b6\ufe0e\u753b\u50cf\u3092\u898b\u3064\u3051\u308b<\/h4>\n\n\n\n<p>\u3053\u3053\u307e\u3067\u3001\u66f8\u7c4d\u30ea\u30b9\u30c8\u3092\u4f5c\u6210\u3059\u308b\u65b9\u6cd5\u3068\u3001\u30c7\u30fc\u30bfAPI\u3092\u4f7f\u3063\u3066\u30da\u30fc\u30b8\u30ec\u30d9\u30eb\u306e\u7d4c\u9a13\u7684\u306a\u7279\u5fb4\u3092\u542b\u3080\u30e1\u30bf\u30c7\u30fc\u30bf\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u3092\u53d6\u5f97\u3059\u308b\u65b9\u6cd5\u3092\u898b\u3066\u304d\u307e\u3057\u305f\u3002HT\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u306e\u30b3\u30a2\u95a2\u6570\u306f\u3001<font style=\"color:#ff0000\" class=\"has-text-color\">ht_picture_download(item_id,\nout_dir=None)<\/font>\u3068\u3044\u3046\u30b7\u30b0\u30cd\u30c1\u30e3\u304c\u3042\u308a\u307e\u3059\u3002\u30e6\u30cb\u30fc\u30af\u306a\u8b58\u5225\u5b50\u3068\u30aa\u30d7\u30b7\u30e7\u30f3\u306e\u4fdd\u5b58\u5148\u30c7\u30a3\u30ec\u30af\u30c8\u30ea\u3092\u4e0e\u3048\u308b\u3068\u3001\u3053\u306e\u95a2\u6570\u306f\u307e\u305aAPI\u304b\u3089\u66f8\u7c4d\u30e1\u30bf\u30c7\u30fc\u30bf\u3092\u53d6\u5f97\u3057\u3001JSON\u5f62\u5f0f\u306b\u5909\u63db\u3057\u307e\u3059\u3002\u6b21\u306b\u30da\u30fc\u30b8\u30b7\u30fc\u30b1\u30f3\u30b9\u3092\u30eb\u30fc\u30d7\u3057\u3001\uff08\u3082\u3057\u3042\u308c\u3070\uff09<font style=\"color:#ff0000\" class=\"has-text-color\">IMAGE_ON_PAGE<\/font>\u30bf\u30b0\u304c<font style=\"color:#ff0000\" class=\"has-text-color\">htd:pfeat<\/font>\u30ea\u30b9\u30c8\u306b\u3042\u308b\u304b\u3069\u3046\u304b\u3092\u78ba\u8a8d\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-preformatted has-background has-very-light-gray-background-color\">  <em># API\u304b\u3089\u306e\u30e1\u30bf\u30c7\u30fc\u30bf\u3092json\u5f62\u5f0f\u3067\u8aad\u307f\u8fbc\u3080(HT\u30b3\u30ec\u30af\u30b7\u30e7\u30f3\u306e\u30e1\u30bf\u30c7\u30fc\u30bf\u3068\u306f\u7570\u306a\u308b)<\/em>\n meta <strong>=<\/strong> json<strong>.<\/strong>loads(data_api<strong>.<\/strong>getmeta(item_id, json<strong>=<\/strong>True))\n &nbsp;\n <em># \u30b7\u30fc\u30b1\u30f3\u30b9\u306f\u3001\u30b9\u30ad\u30e3\u30f3\u3055\u308c\u305f\u30a2\u30a4\u30c6\u30e0\u306e\u5404\u30da\u30fc\u30b8\u3092\u9806\u756a\u306b\u53d6\u5f97\u3057\u3001\u8ffd\u52a0\u60c5\u5831\u304c\u3042\u308c\u3070\u305d\u308c\u3082\u542b\u3081\u308b<\/em>\n sequence <strong>=<\/strong> meta[<font style=\"color:#ff0000\" class=\"has-text-color\">'htd:seqmap'<\/font>][0][<font style=\"color:#ff0000\" class=\"has-text-color\">'htd:seq'<\/font>]\n &nbsp;\n <em># \u633f\u7d75\u30da\u30fc\u30b8\u3092\u683c\u7d0d\u3059\u308b\u305f\u3081\u306e\u7a7a\u306e\u30ea\u30b9\u30c8\u3092\u4f5c\u6210<\/em>\n img_pages <strong>=<\/strong> []\n &nbsp;\n <em># try\/except\u30d6\u30ed\u30c3\u30af\u306f\"pfeats\"\u304c\u5b58\u5728\u3057\u306a\u3044\u5834\u5408\u3001\u3042\u308b\u3044\u306f\u3001\u30b7\u30fc\u30b1\u30f3\u30b9\u756a\u53f7\u304c\u6570\u5024\u3067\u306a\u3044\u5834\u5408\u3092\u51e6\u7406\u3059\u308b\u3082\u306e\u3067\u3059<\/em>\n <strong>for<\/strong> page <strong>in<\/strong> sequence:\n &nbsp;&nbsp;&nbsp; <strong>try<\/strong>:\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <strong>if<\/strong> <font style=\"color:#ff0000\" class=\"has-text-color\">'IMAGE_ON_PAGE' <\/font><strong>in<\/strong> page[<font style=\"color:#ff0000\" class=\"has-text-color\">'htd:pfeat'<\/font>]:\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; img_pages<strong>.<\/strong>append(int(page[<font style=\"color:#ff0000\" class=\"has-text-color\">'pseq'<\/font>]))\n &nbsp;&nbsp;&nbsp; <strong>except<\/strong> (<font style=\"color:#0000FF\" class=\"has-text-color\">KeyError<\/font>, <font style=\"color:#0000FF\" class=\"has-text-color\">TypeError<\/font>) <strong>as<\/strong> e:\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; <strong>continue<\/strong> <\/pre>\n\n\n\n<p>\u30c8\u30c3\u30d7\u30ec\u30d9\u30eb\u306e\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u304b\u3089\u6570\u6bb5\u968e\u6398\u308a\u4e0b\u3052\u306a\u3044\u3068\u3001\u53cd\u5fa9\u51e6\u7406\u3092\u884c\u3048\u308b<font style=\"color:#ff0000\" class=\"has-text-color\">htd:seq<\/font>\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u306b\u8fbf\u308a\u7740\u3051\u306a\u3044\u3053\u3068\u306b\u6ce8\u610f\u3057\u3066\u304f\u3060\u3055\u3044\u3002<\/p>\n\n\n\n<p>\u6355\u307e\u3048\u305f\u30442\u3064\u306e\u4f8b\u5916\u306f\u3001\u30da\u30fc\u30b8\u306b\u30da\u30fc\u30b8\u30ec\u30d9\u30eb\u306e\u7279\u5fb4\u304c\u306a\u3044\u5834\u5408\u306b\u8d77\u3053\u308b<font style=\"color:#ff0000\" class=\"has-text-color\">KeyError<\/font>\u3068\u3001\u4f55\u3089\u304b\u306e\u7406\u7531\u3067\u30da\u30fc\u30b8\u306e<font style=\"color:#ff0000\" class=\"has-text-color\">pseq<\/font>\u306e\u5024\u304c\u975e\u6570\u5024\u578b\u306e\u305f\u3081\u306b<font style=\"color:#ff0000\" class=\"has-text-color\">int\u578b<\/font>\u306b\u5909\u63db\u3067\u304d\u306a\u3044\u5834\u5408\u306b\u8d77\u3053\u308b<font style=\"color:#ff0000\" class=\"has-text-color\">TypeError<\/font>\u3067\u3059\u3002\u30da\u30fc\u30b8\u3067\u306a\u306b\u304b\u304a\u304b\u3057\u306a\u3053\u3068\u304c\u8d77\u3053\u3063\u305f\u5834\u5408\u306b\u306f\u3001\u6b21\u306e\u30da\u30fc\u30b8\u306b<font style=\"color:#ff0000\" class=\"has-text-color\">continue<\/font>\u3059\u308b\u3060\u3051\u3067\u3059\u3002\u8003\u3048\u65b9\u3068\u3057\u3066\u306f\u3001\u3067\u304d\u308b\u9650\u308a\u306e\u826f\u3044\u30c7\u30fc\u30bf\u3092\u53d6\u5f97\u3059\u308b\u3053\u3068\u306b\u3042\u308a\u307e\u3059\u3002\u30a2\u30a4\u30c6\u30e0\u306e\u30e1\u30bf\u30c7\u30fc\u30bf\u306e\u4e0d\u6574\u5408\u3084\u30ae\u30e3\u30c3\u30d7\u3092\u304d\u308c\u3044\u306b\u3059\u308b\u305f\u3081\u3067\u306f\u3042\u308a\u307e\u305b\u3093\u3002<\/p>\n\n\n\n<h4 class=\"wp-block-heading\">\u25b6\ufe0e\u753b\u50cf\u3092\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3059\u308b<\/h4>\n\n\n\n<p><font style=\"color:#ff0000\" class=\"has-text-color\">img_pages<\/font>\u306b<font style=\"color:#ff0000\" class=\"has-text-color\">IMAGE_ON_PAGE<\/font>\u3067\u30bf\u30b0\u4ed8\u3051\u3055\u308c\u305f\u30da\u30fc\u30b8\u306e\u5b8c\u5168\u306a\u30ea\u30b9\u30c8\u304c\u542b\u307e\u308c\u3066\u3044\u308c\u3070\u3001\u305d\u308c\u3089\u306e\u30da\u30fc\u30b8\u3092\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3067\u304d\u307e\u3059\u3002<font style=\"color:#ff0000\" class=\"has-text-color\">ht_picture_download()<\/font>\u306b<font style=\"color:#ff0000\" class=\"has-text-color\">out_dir<\/font>\u304c\u6307\u5b9a\u3055\u3066\u308c\u306a\u3044\u5834\u5408\u3001\u3053\u306e\u95a2\u6570\u306f\u5358\u306b<font style=\"color:#ff0000\" class=\"has-text-color\">img_pages<\/font>\u306e\u30ea\u30b9\u30c8\u3092\u8fd4\u3057\u3001\u306a\u306b\u3082\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3057\u306a\u3044\u3053\u3068\u306b\u6ce8\u610f\u305b\u3088\u3002<\/p>\n\n\n\n<p>API\u30b3\u30fc\u30eb<font style=\"color:#ff0000\" class=\"has-text-color\">getpageimage()<\/font>\u306f\u3001\u30c7\u30d5\u30a9\u30eb\u30c8\u3067JPEG\u3092\u8fd4\u3057\u307e\u3059\u3002\u901a\u5e38\u306e\u65b9\u6cd5\u3067\u3001\u5358\u306bJPEG\u306e\u30d0\u30a4\u30c8\u914d\u5217\u3092\u30d5\u30a1\u30a4\u30eb\u306b\u66f8\u304d\u51fa\u3059\u3060\u3051\u3067\u3059\u3002\uff08<font style=\"color:#ff0000\" class=\"has-text-color\">out_dir<\/font>\u5185\u306e\uff09\u8cc7\u6599\u30b5\u30d6\u30d5\u30a9\u30eb\u30c0\u5185\u3067\u306f\u30011\u30da\u30fc\u30b8\u76ee\u306f<font style=\"color:#ff0000\" class=\"has-text-color\">1.jpg<\/font>\u3068\u3044\u3046\u3088\u3046\u306b\u540d\u524d\u3092\u4ed8\u3051\u3089\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u8003\u616e\u3059\u3079\u304d\u306f\u3001API\u306e\u4f7f\u7528\u7387\u3067\u3059\u30021\u5206\u9593\u306b\u4f55\u767e\u3082\u306e\u30ea\u30af\u30a8\u30b9\u30c8\u3092\u3057\u3066\u30a2\u30af\u30bb\u30b9\u3092\u60aa\u7528\u3059\u308b\u3053\u3068\u306f\u907f\u3051\u305f\u3044\u3082\u306e\u3067\u3059\u3002\u5b89\u5168\u306e\u305f\u3081\u306b\u3001\u7279\u306b\u5927\u304d\u306a\u30b8\u30e7\u30d6\u3092\u5b9f\u884c\u3059\u308b\u5834\u5408\u306f\u3001\u5404\u30da\u30fc\u30b8\u306e\u30ea\u30af\u30a8\u30b9\u30c8\u3092\u884c\u3046\u524d\u306b2\u79d2\u5f85\u3061\u307e\u3059\u3002\u3053\u308c\u306f\u77ed\u671f\u7684\u306b\u306f\u30a4\u30e9\u30a4\u30e9\u3059\u308b\u304b\u3082\u3057\u308c\u307e\u305b\u3093\u304c\u3001API\u306e\u30b9\u30ed\u30c3\u30c8\u30eb\u3084\u4f7f\u7528\u505c\u6b62\u3092\u56de\u907f\u3059\u308b\u3053\u3068\u306b\u5f79\u7acb\u3061\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\"> for i, page in enumerate(img_pages):\n &nbsp;&nbsp;&nbsp; try:\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; # \u7c21\u5358\u306a\u51e6\u7406\u72b6\u6cc1\u306e\u78ba\u8a8d\u30e1\u30c3\u30bb\u30fc\u30b8\u306e\u51fa\u529b\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; print(\"[{}] Downloading page {} ({}\/{})\".format(item_id, \\\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; page, i+1, total_pages))\n &nbsp;\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; img = data_api.getpageimage(item_id, page)\n &nbsp;\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; # \u6ce8\u610f\uff1aout_dir \u304c None \u3067\u306f\u306a\u3044\u5834\u5408\u306b\u306e\u307f\u30eb\u30fc\u30d7\u306f\u5b9f\u884c\u3055\u308c\u307e\u3059\u3002\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; img_out = os.path.join(out_dir, str(page) + \".jpg\")\n &nbsp;\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; # \u753b\u50cf\u3092\u66f8\u304d\u51fa\u3059\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; with open(img_out, 'wb') as fp:\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; fp.write(img)\n &nbsp;\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; # API\u5229\u7528\u505c\u6b62\u306e\u56de\u907f\u3059\u308b\u305f\u3081\u306b2\u79d2\u9593\u3092\u631f\u3080\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; time.sleep(2)\n &nbsp;\n &nbsp;&nbsp;&nbsp; except Exception as e:\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; print(\"[{}] Error downloading page {}: {}\".format(item_id, page,e)) <\/pre>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"internet-archive\">Internet\nArchive<\/h2>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"api-access-1\">API\u30a2\u30af\u30bb\u30b9<\/h3>\n\n\n\n<p>API\u30c8\u30fc\u30af\u30f3\u3067\u306f\u306a\u304f\u3001Archive.org\u306e\u30a2\u30ab\u30a6\u30f3\u30c8\u306e\u30e1\u30fc\u30eb\u3068\u30d1\u30b9\u30ef\u30fc\u30c9\u3092\u4f7f\u3063\u3066\u3001Python API\u30e9\u30a4\u30d6\u30e9\u30ea\u306b\u63a5\u7d9a\u3057\u307e\u3059\u3002\u3053\u308c\u306b\u3064\u3044\u3066\u306f\u3001<a href=\"https:\/\/archive.org\/services\/docs\/api\/internetarchive\/quickstart.html\">\u30af\u30a4\u30c3\u30af\u30b9\u30bf\u30fc\u30c8\u30ac\u30a4\u30c9<\/a>\u3067\u8aac\u660e\u3057\u3066\u3044\u307e\u3059\u3002\u30a2\u30ab\u30a6\u30f3\u30c8\u3092\u6301\u3063\u3066\u3044\u306a\u3044\u5834\u5408\u306f\u3001\u300c\u4eee\u60f3\u56f3\u66f8\u9928\u30ab\u30fc\u30c9\uff08\u201cVirtual Library Card\u201d\uff09\u300d\u306b<a href=\"https:\/\/archive.org\/account\/login.createaccount.php\">\u767b\u9332<\/a>\u3057\u3066\u304f\u3060\u3055\u3044\u3002<\/p>\n\n\n\n<p><font style=\"color:#ff0000\" class=\"has-text-color\">internetarchive.ipynb<\/font>\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u306e\u6700\u521d\u306e\u30bb\u30eb\u306b\u3001\u6307\u793a\u3055\u308c\u305f\u3068\u304a\u308a\u306b\u3001\u8cc7\u683c\u60c5\u5831\u3092\u5165\u529b\u3057\u307e\u3059\u3002API\u3078\u306e\u8a8d\u8a3c\u3092\u884c\u3046\u305f\u3081\u306b\u30bb\u30eb\u3092\u5b9f\u884c\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"create-volume-list-1\">\u8cc7\u6599\u30ea\u30b9\u30c8\u306e\u4f5c\u6210<\/h3>\n\n\n\n<p>IA\u306ePython\u30e9\u30a4\u30d6\u30e9\u30ea\u3092\u4f7f\u7528\u3057\u3066\u30af\u30a8\u30ea\u6587\u5b57\u5217\u3092\u9001\u4fe1\u3059\u308b\u3068\u3001\u201didentifier\u201d\uff08\u8b58\u5225\u5b50\uff09\u3068\u3044\u3046\u5358\u8a9e\u304c\u30ad\u30fc\u3067\u5b9f\u969b\u306e\u8b58\u5225\u5b50\u304c\u30c7\u30fc\u30bf\u5024\u3068\u306a\u308b\u3001\u30ad\u30fc\u3068\u5024\u306e\u30da\u30a2\u306e\u30ea\u30b9\u30c8\u3092\u53d7\u3051\u53d6\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u30af\u30a8\u30ea\u306e\u69cb\u6587\u306b\u3064\u3044\u3066\u306f\u3001IA\u306e<a href=\"https:\/\/archive.org\/advancedsearch.php\">\u9ad8\u5ea6\u691c\u7d22\u30da\u30fc\u30b8<\/a>\u3067\u8aac\u660e\u3057\u3066\u3044\u307e\u3059\u3002\u201ddate\u201d\uff08\u65e5\u4ed8\uff09\u3001\u201dmediatype\u201d\uff08\u30e1\u30c7\u30a3\u30a2\u30bf\u30a4\u30d7\uff09\u306e\u3088\u3046\u306a\u30ad\u30fc\u30ef\u30fc\u30c9\u306e\u5f8c\u306b\u30b3\u30ed\u30f3\u3092\u7d9a\u3051\u3001\u305d\u306e\u30d1\u30e9\u30e1\u30fc\u30bf\u306b\u5272\u308a\u5f53\u3066\u305f\u3044\u5024\u3092\u6307\u5b9a\u3057\u307e\u3059\u3002\u4f8b\u3048\u3070\u3001\u79c1\u306f\uff08\u52d5\u753b\u306a\u3069\u3067\u306f\u306a\u304f\uff09\u300c\u30c6\u30ad\u30b9\u30c8\u300d\u306e\u7d50\u679c\u3060\u3051\u3092\u8868\u793a\u3057\u305f\u3044\u3067\u3059\u3002\u4f7f\u7528\u3057\u3088\u3046\u3068\u3059\u308b\u30d1\u30e9\u30e1\u30fc\u30bf\u3068\u30aa\u30d7\u30b7\u30e7\u30f3\u304cIA\u306e\u691c\u7d22\u6a5f\u80fd\u3067\u30b5\u30dd\u30fc\u30c8\u3055\u308c\u3066\u3044\u308b\u3053\u3068\u3092\u78ba\u8a8d\u3057\u3066\u304f\u3060\u3055\u3044\u3002\u305d\u3046\u3057\u306a\u3044\u3068\u3001\u6b20\u3051\u3066\u3044\u305f\u308a\u3084\u5947\u5999\u306a\u7d50\u679c\u304c\u5f97\u3089\u308c\u3001\u307e\u305f\u3001\u305d\u306e\u7406\u7531\u304c\u5206\u304b\u3089\u306a\u304f\u306a\u308b\u53ef\u80fd\u6027\u304c\u3042\u308a\u307e\u3059\u3002<\/p>\n\n\n\n<p>\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u3067\u306f\u3001\u79c1\u306fIA\u306eID\u30ea\u30b9\u30c8\u3092\u751f\u6210\u3057\u3066\u3044\u307e\u3059\u3002\uff1a<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\"> # \u30b5\u30f3\u30d7\u30eb\u691c\u7d22\uff082\u3064\u306e\u7d50\u679c\u304c\u5f97\u3089\u308c\u307e\u3059\uff09\n query = \"peter parley date:[1825 TO 1830] mediatype:texts\"\n vol_ids = [result['identifier'] for result in ia.search_items(query)] <\/pre>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"visual-feature-picture-blocks\">\u8996\u899a\u7684\u306a\u7279\u5fb4\uff1aPicture\u30d6\u30ed\u30c3\u30af<\/h3>\n\n\n\n<p>Internet\nArchive\u306f\u30da\u30fc\u30b8\u30ec\u30d9\u30eb\u306e\u7279\u5fb4\u306f\u4e00\u5207\u516c\u958b\u3057\u3066\u3044\u307e\u305b\u3093\u3002\u305d\u306e\u4ee3\u308f\u308a\u306b\u3001\u30c7\u30b8\u30bf\u30eb\u5316\u306e\u904e\u7a0b\u3067\u5f97\u3089\u308c\u305f\u591a\u304f\u306e\u751f\u306e\u30d5\u30a1\u30a4\u30eb\u3092\u30e6\u30fc\u30b6\u30fc\u304c\u5229\u7528\u3067\u304d\u308b\u3088\u3046\u306b\u3057\u3066\u3044\u307e\u3059\u3002\u79c1\u9054\u306e\u76ee\u7684\u306b\u6700\u3082\u91cd\u8981\u306a\u306e\u306f\u3001Abbyy\nXML\u30d5\u30a1\u30a4\u30eb\u3067\u3059\u3002Abbyy\u793e\u306f\u30ed\u30b7\u30a2\u306e\u4f01\u696d\u3067\u3001FineReader\u3068\u3044\u3046\u30bd\u30d5\u30c8\u30a6\u30a7\u30a2\u306fOCR\u5e02\u5834\u3092\u5e2d\u5dfb\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p>FineReader\u306e\u6700\u8fd1\u306e\u30d0\u30fc\u30b8\u30e7\u30f3\u306f\u3059\u3079\u3066\u3001\u30b9\u30ad\u30e3\u30f3\u3057\u305f\u6587\u66f8\u306e\u5404\u30da\u30fc\u30b8\u306b\u3001\u7570\u306a\u308b\u300c\u30d6\u30ed\u30c3\u30af\u300d\u3092\u95a2\u9023\u4ed8\u3051\u308b<a href=\"https:\/\/en.wikipedia.org\/wiki\/XML\">XML\u6587\u66f8<\/a>\u3092\u751f\u6210\u3057\u3066\u3044\u307e\u3059\u3002\u6700\u3082\u4e00\u822c\u7684\u306a\u30d6\u30ed\u30c3\u30af\u306f<font style=\"color:#ff0000\" class=\"has-text-color\">Text<\/font>\u3067\u3059\u304c\u3001<font style=\"color:#ff0000\" class=\"has-text-color\">Picture<\/font>\u30d6\u30ed\u30c3\u30af\u3082\u3042\u308a\u307e\u3059\u3002\u3053\u3053\u3067\u306f\u3001IA\u306eAbbyy XML\u30d5\u30a1\u30a4\u30eb\u304b\u3089\u53d6\u308a\u51fa\u3057\u305f\u30d6\u30ed\u30c3\u30af\u306e\u4f8b\u3092\u793a\u3057\u307e\u3059\u3002\u5de6\u4e0a\uff08\u300ct\u300d\u3068\u300cl\u300d\uff09\u3068\u53f3\u4e0b\uff08\u300cb\u300d\u3068\u300cr\u300d\uff09\u306e\u9685\u3067\u3001\u5341\u5206\u9577\u65b9\u5f62\u306e\u30d6\u30ed\u30c3\u30af\u9818\u57df\u3092\u7279\u5b9a\u3067\u304d\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-preformatted has-background has-very-light-gray-background-color\"><font style=\"color:#0000ff\" class=\"has-text-color\">  &lt;block <font style=\"color:#006500\" class=\"has-text-color\">blockType=<\/font><font style=\"color:#ff0000\" class=\"has-text-color\">\"Picture\"<\/font> <font style=\"color:#006500\" class=\"has-text-color\">l=<\/font><font style=\"color:#ff0000\" class=\"has-text-color\">\"586\"<\/font> <font style=\"color:#006500\" class=\"has-text-color\">t=<\/font><font style=\"color:#ff0000\" class=\"has-text-color\">\"1428\"<\/font> <font style=\"color:#006500\" class=\"has-text-color\">r=<\/font><font style=\"color:#ff0000\" class=\"has-text-color\">\"768\"<\/font> <font style=\"color:#006500\" class=\"has-text-color\">b=<\/font><font style=\"color:#ff0000\" class=\"has-text-color\">\"1612\"<\/font>&gt;\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; &lt;region&gt;&lt;rect <font style=\"color:#006500\" class=\"has-text-color\">l=<\/font><font style=\"color:#ff0000\" class=\"has-text-color\">\"586\"<\/font> <font style=\"color:#006500\" class=\"has-text-color\">t=<\/font><font style=\"color:#ff0000\" class=\"has-text-color\">\"1428\"<\/font> <font style=\"color:#006500\" class=\"has-text-color\">r=<\/font><font style=\"color:#ff0000\" class=\"has-text-color\">\"768\"<\/font> <font style=\"color:#006500\" class=\"has-text-color\">b=<\/font><font style=\"color:#ff0000\" class=\"has-text-color\">\"1612\"<\/font>&gt;&lt;\/rect&gt;&lt;\/region&gt;\n &lt;\/block&gt; <\/font><\/pre>\n\n\n\n<p>HT\u3067<font style=\"color:#ff0000\" class=\"has-text-color\">IMAGE_ON_PAGE<\/font>\u30bf\u30b0\u3092\u63a2\u3059\u306e\u3068\u540c\u7b49\u306eIA\u3067\u306e\u65b9\u6cd5\u306f\u3001Abbyy\nXML\u30d5\u30a1\u30a4\u30eb\u3092\u89e3\u6790\u3057\u3001\u5404\u30da\u30fc\u30b8\u306e\u53cd\u5fa9\u51e6\u7406\u3092\u3059\u308b\u3053\u3068\u3067\u3059\u3002\u305d\u306e\u30da\u30fc\u30b8\u306b1\u3064\u3067\u3082<font style=\"color:#ff0000\" class=\"has-text-color\">Picture<\/font>\u30d6\u30ed\u30c3\u30af\u304c\u3042\u308c\u3070\u3001\u305d\u306e\u30da\u30fc\u30b8\u306f\u753b\u50cf\u304c\u542b\u307e\u308c\u3066\u3044\u308b\u53ef\u80fd\u6027\u304c\u3042\u308b\u3068\u3044\u3046\u30d5\u30e9\u30b0\u304c\u7acb\u3066\u3089\u308c\u307e\u3059\u3002<\/p>\n\n\n\n<p>HT\u306e<font style=\"color:#ff0000\" class=\"has-text-color\">IMAGE_ON_PAGE<\/font>\u306e\u7279\u5fb4\u3067\u306f\u753b\u50cf\u306e\u300c\u4f4d\u7f6e\u300d\u306b\u3064\u3044\u3066\u306e\u60c5\u5831\u306f\u542b\u307e\u308c\u3066\u3044\u306a\u3044\u304c\u3001XML\u30d5\u30a1\u30a4\u30eb\u306e<font style=\"color:#ff0000\" class=\"has-text-color\">Picture<\/font>\u30d6\u30ed\u30c3\u30af\u306f\u30da\u30fc\u30b8\u4e0a\u306e\u9577\u65b9\u5f62\u306e\u9818\u57df\u306b\u95a2\u9023\u4ed8\u3051\u3089\u308c\u3066\u3044\u308b\u3002\u3057\u304b\u3057FineReader\u306f\u897f\u6d0b\u306e\u6587\u5b57\u30bb\u30c3\u30c8\u304b\u3089\u306e\u6587\u5b57\u3092\u8a8d\u8b58\u3059\u308b\u3053\u3068\u306b\u7279\u5316\u3057\u3066\u3044\u308b\u306e\u3067\u3001\u753b\u50cf\u306e\u9818\u57df\u3092\u7279\u5b9a\u3059\u308b\u7cbe\u5ea6\u306f\u306f\u308b\u304b\u306b\u4f4e\u3044\u3002Leetaru\u306e\u30d7\u30ed\u30b8\u30a7\u30af\u30c8\uff08\u300c\u6982\u8981\u300d\u3092\u53c2\u7167\uff09\u3067\u306f\u3001\u5ea7\u6a19\u3092\u4f7f\u3063\u3066\u633f\u7d75\u3092\u30c8\u30ea\u30df\u30f3\u30b0\u3057\u3066\u3044\u307e\u3057\u305f\u304c\u3001\u3053\u306e\u30ec\u30c3\u30b9\u30f3\u3067\u306f\u5358\u306b\u30da\u30fc\u30b8\u5168\u4f53\u3092\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3059\u308b\u3053\u3068\u306b\u3057\u307e\u3057\u305f\u3002<\/p>\n\n\n\n<p>\u3053\u306e\u30ec\u30c3\u30b9\u30f3\u306e\u77e5\u7684\u306a\u697d\u3057\u307f\u306e\u4e00\u3064\u306f\u3001\u30ce\u30a4\u30ba\u306e\u591a\u3044\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\uff08\u3064\u307e\u308a\u3001OCR\u306e\u30d6\u30ed\u30c3\u30af\u30bf\u30b0\uff09\u3092\u3001\u5358\u8a9e\u3067\u306f\u306a\u304f\u633f\u7d75\u3092\u8a8d\u8b58\u3059\u308b\u305f\u3081\u306b\u5229\u7528\u3059\u308b\u3068\u3044\u3046\u3001\u610f\u56f3\u3055\u308c\u3066\u3044\u306a\u3044\u76ee\u7684\u306e\u305f\u3081\u306b\u4f7f\u3046\u3053\u3068\u306b\u3042\u308a\u307e\u3059\u3002\u5c06\u6765\u7684\u306b\u306f\u3001\u66f8\u7c4d\u5185\u306e\u5168\u3066\u306e\u30da\u30fc\u30b8\u753b\u50cf\u306b\u30c7\u30a3\u30fc\u30d7\u30e9\u30fc\u30cb\u30f3\u30b0\u30e2\u30c7\u30eb\u3092\u5b9f\u884c\u3057\u3001\u5e0c\u671b\u3059\u308b\u30bf\u30a4\u30d7\u306e\u633f\u7d75\u3092\u9078\u3073\u51fa\u3059\u3053\u3068\u304c\u30b3\u30f3\u30d4\u30e5\u30fc\u30bf\u3067\u53ef\u80fd\u306b\u306a\u308b\u3067\u3057\u3087\u3046\u3002\u3057\u304b\u3057\u3001\u307b\u3068\u3093\u3069\u306e\u8cc7\u6599\u306e\u307b\u3068\u3093\u3069\u306e\u30da\u30fc\u30b8\u306b\u306f\u633f\u7d75\u304c\u306a\u3044\u305f\u3081\u3001\u3053\u308c\u306f\u8a08\u7b97\u30b3\u30b9\u30c8\u306e\u304b\u304b\u308b\u4f5c\u696d\u3067\u3059\u3002\u4eca\u306e\u3068\u3053\u308d\u306f\u3001OCR\u306e\u8a8d\u8b58\u904e\u7a0b\u304b\u3089\u5f97\u3089\u308c\u305f\u65e2\u5b58\u30c7\u30fc\u30bf\u3092\u6d3b\u7528\u3059\u308b\u65b9\u304c\u7406\u306b\u9069\u3063\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<p>OCR\u81ea\u4f53\u304c\u3069\u306e\u3088\u3046\u306b\u52d5\u4f5c\u3057\u3001\u30b9\u30ad\u30e3\u30f3\u30d7\u30ed\u30bb\u30b9\u3068\u76f8\u4e92\u4f5c\u7528\u3059\u308b\u304b\u306b\u3064\u3044\u3066\u306e\u8a73\u7d30\u306f\u3001Mila Oiva\u306eProgramming Historian\u306e\u30ec\u30c3\u30b9\u30f3\u300c<a href=\"https:\/\/programminghistorian.org\/en\/lessons\/retired\/OCR-with-Tesseract-and-ScanTailor\">OCR with Tesseract and ScanTailor<\/a>\u300d\u3092\u53c2\u7167\u3057\u3066\u304f\u3060\u3055\u3044\u3002\u30a8\u30e9\u30fc\u306f\u3001\u3086\u304c\u307f\u3084\u4eba\u70ba\u7684\u306a\u5f71\u97ff\u306a\u3069\u591a\u304f\u306e\u554f\u984c\u306b\u3088\u3063\u3066\u751f\u3058\u3048\u307e\u3059\u3002\u3053\u308c\u3089\u306e\u30a8\u30e9\u30fc\u306f\u3001\u7d50\u679c\u7684\u306b\u300cPicture\u300d\u30d6\u30ed\u30c3\u30af\u306e\u4fe1\u983c\u6027\u3068\u7cbe\u5ea6\u306b\u5f71\u97ff\u3092\u4e0e\u3048\u308b\u3053\u3068\u306b\u306a\u308a\u307e\u3059\u3002\u591a\u304f\u306e\u5834\u5408\u3001Abbyy\u306f\u7a7a\u767d\u30da\u30fc\u30b8\u3084\u5909\u8272\u3057\u305f\u30da\u30fc\u30b8\u3092\u633f\u7d75\uff08\u753b\u50cf\u9818\u57df\uff09\u3060\u3068\u63a8\u5b9a\u3057\u307e\u3059\u3002\u3053\u308c\u3089\u306e\u4e0d\u6b63\u78ba\u306a\u30d6\u30ed\u30c3\u30af\u30bf\u30b0\u306f\u671b\u307e\u3057\u304f\u306a\u3044\u3082\u306e\u3067\u3059\u304c\u3001\u518d\u8a13\u7df4\u3055\u305b\u305f\u7573\u307f\u8fbc\u307f\u30cb\u30e5\u30fc\u30e9\u30eb\u30cd\u30c3\u30c8\u30ef\u30fc\u30af\u306b\u3088\u3063\u3066\u51e6\u7406\u3059\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u3053\u306e\u30ec\u30c3\u30b9\u30f3\u3067\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3059\u308b\u30da\u30fc\u30b8\u753b\u50cf\u306f\u3001\u30af\u30ea\u30fc\u30f3\u3067\u5229\u7528\u53ef\u80fd\u306a\u6b74\u53f2\u7684\u306a\u633f\u7d75\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3092\u5f97\u308b\u305f\u3081\u306e\u9577\u3044\u30d7\u30ed\u30bb\u30b9\u306e\u6700\u521d\u306e\u6bb5\u968e\u3068\u8003\u3048\u308b\u3088\u3046\u306b\u3057\u3066\u304f\u3060\u3055\u3044\u3002<\/p>\n\n\n\n<h3 class=\"wp-block-heading\" id=\"code-walk-through-1\">\u30b3\u30fc\u30c9\u30fb\u30a6\u30a9\u30fc\u30af\u30b9\u30eb\u30fc<\/h3>\n\n\n\n<h4 class=\"wp-block-heading\" id=\"find-pictures-1\">\u25b6\ufe0e\u753b\u50cf\u3092\u898b\u3064\u3051\u308b<\/h4>\n\n\n\n<p>HT\u3068\u540c\u69d8\u3001IA\u306e\u30b3\u30a2\u95a2\u6570\u306f<font style=\"color:#ff0000\" class=\"has-text-color\">ia_picture_download(item_id,\nout_dir=None)<\/font>\u3067\u3059\u3002<\/p>\n\n\n\n<p>\u3053\u308c\u306f\u30d5\u30a1\u30a4\u30eb\u306e\u5165\u51fa\u529b\u3092\u4f34\u3046\u306e\u3067\u3001<font style=\"color:#ff0000\" class=\"has-text-color\">img_pages<\/font>\u30ea\u30b9\u30c8\u3092\u5f97\u308b\u306b\u306fHT\u306e\u5834\u5408\u3088\u308a\u8907\u96d1\u3067\u3059\u3002\uff08\u30e9\u30a4\u30d6\u30e9\u30ea\u3068\u5171\u306b\u30a4\u30f3\u30b9\u30c8\u30fc\u30eb\u3055\u308c\u308b\uff09\u30b3\u30de\u30f3\u30c9\u30e9\u30a4\u30f3\u30e6\u30fc\u30c6\u30a3\u30ea\u30c6\u30a3\u306e<font style=\"color:#ff0000\" class=\"has-text-color\">ia<\/font>\u3092\u4f7f\u3046\u3068\u3001\u8cc7\u6599\u306b\u95a2\u3059\u308b\u5229\u7528\u53ef\u80fd\u306a\u30e1\u30bf\u30c7\u30fc\u30bf\u30d5\u30a1\u30a4\u30eb\u3092\u77e5\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002I\u3054\u304f\u5c11\u6570\u306e\u4f8b\u5916\u3092\u9664\u3044\u3066\u3001Internet\nArchive\u4e0a\u306e\u30e1\u30c7\u30a3\u30a2\u30bf\u30a4\u30d7\u306e\u30c6\u30ad\u30b9\u30c8\u3092\u6301\u3064\u30dc\u30ea\u30e5\u30fc\u30e0\u3067\u306f\u3001\u30d5\u30a9\u30fc\u30de\u30c3\u30c8\u304c\u300cAbbyy\nGZ\u300d\u306e\u30d5\u30a1\u30a4\u30eb\u304c\u5229\u7528\u3067\u304d\u308b\u306f\u305a\u3067\u3059\u3002<\/p>\n\n\n\n<p>\u3053\u308c\u3089\u306e\u30d5\u30a1\u30a4\u30eb\u306f\u3001\u305f\u3068\u3048\u5727\u7e2e\u3055\u308c\u3066\u3044\u305f\u3068\u3057\u3066\u3082\u3001\u7c21\u5358\u306b\u6570\u767e\u30e1\u30ac\u30d0\u30a4\u30c8\u306e\u30b5\u30a4\u30ba\u306b\u306a\u308b\u3053\u3068\u304c\u3042\u308a\u307e\u3059\u3002\u30dc\u30ea\u30e5\u30fc\u30e0\u306bAbbyy\u30d5\u30a1\u30a4\u30eb\u304c\u3042\u308c\u3070\u3001\u305d\u306e\u540d\u524d\u3092\u53d6\u5f97\u3057\u3066\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3057\u307e\u3059\u3002<font style=\"color:#ff0000\" class=\"has-text-color\">ia.download()<\/font>\u30b3\u30fc\u30eb\u3067\u306f\u3001\u30d5\u30a1\u30a4\u30eb\u304c\u65e2\u306b\u5b58\u5728\u3059\u308b\u5834\u5408\u306f\u30ea\u30af\u30a8\u30b9\u30c8\u3092\u7121\u8996\u3057\u3001\u5b58\u5728\u3057\u306a\u3044\u5834\u5408\u306f\u5165\u308c\u5b50\u306b\u306a\u3063\u305f\u30c7\u30a3\u30ec\u30af\u30c8\u30ea\u3092\u4f5c\u3089\u305a\u306b\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3059\u308b\u305f\u3081\u306e\u4fbf\u5229\u306a\u30d1\u30e9\u30e1\u30fc\u30bf\u3092\u4f7f\u7528\u3057\u3066\u3044\u307e\u3059\u3002\u5bb9\u91cf\u3092\u7bc0\u7d04\u3059\u308b\u305f\u3081\u306b\u3001\u30d5\u30a1\u30a4\u30eb\u3092\u89e3\u6790\u3057\u305f\u5f8c\u306b\nAbbyy \u30d5\u30a1\u30a4\u30eb\u3092\u524a\u9664\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-preformatted has-background has-very-light-gray-background-color\"><font style=\"color:#999988\" class=\"has-text-color\"><em># \u5229\u7528\u53ef\u80fd\u306a\u30e1\u30bf\u30c7\u30fc\u30bf\u30d5\u30a9\u30fc\u30de\u30c3\u30c8\u3092\u78ba\u8a8d\u3059\u308b\u305f\u3081\u3001\u30b3\u30de\u30f3\u30c9\u30e9\u30a4\u30f3\u30af\u30e9\u30a4\u30a2\u30f3\u30c8\u3092\u4f7f\u7528\u3057\u307e\u3059\u3002:<\/em>\n <em># `ia metadata formats VOLUME_ID`<\/em>\n &nbsp;\n <em># \u3053\u306e\u30ec\u30c3\u30b9\u30f3\u3067\u306fAbbyy\u30d5\u30a1\u30a4\u30eb\u3060\u3051\u304c\u5fc5\u8981\u3067\u3059\u3002<\/em><\/font>\n returned_files <strong>=<\/strong> <font style=\"color:#0000ff\" class=\"has-text-color\">list<\/font>(ia<strong>.<\/strong>get_files(item_id, formats<strong>=<\/strong>[<font style=\"color:#ff0000\" class=\"has-text-color\">\"Abbyy GZ\"<\/font>]))\n &nbsp;\n <font style=\"color:#999988\" class=\"has-text-color\"><em># \u4f55\u304b\u3057\u3089\u8fd4\u3055\u308c\u305f\u3053\u3068\u3092\u78ba\u8a8d\u3057\u307e\u3059<\/em><\/font>\n <strong>if<\/strong> <font style=\"color:#0000ff\" class=\"has-text-color\">len<\/font>(returned_files) <strong>&gt;<\/strong> <font style=\"color:#0000ff\" class=\"has-text-color\">0:<\/font>\n &nbsp;&nbsp;&nbsp; abbyy_file <strong>=<\/strong> returned_files[<font style=\"color:#0000ff\" class=\"has-text-color\">0<\/font>]<strong>.<\/strong>name\n <strong>else<\/strong>:\n &nbsp;&nbsp;&nbsp; <strong>print<\/strong>(<font style=\"color:#ff0000\" class=\"has-text-color\">\"[{}] Could not get Abbyy file\"<\/font><strong>.<\/strong><font style=\"color:#0000ff\" class=\"has-text-color\">format<\/font>(item_id))\n &nbsp;&nbsp;&nbsp; <strong>return<\/strong> <font style=\"color:#999988\" class=\"has-text-color\">None<\/font>\n &nbsp;\n <font style=\"color:#999988\" class=\"has-text-color\"><em># Abbyy\u30d5\u30a1\u30a4\u30eb\u3092CWD\u3078\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3057\u307e\u3059<\/em><\/font>\n ia<strong>.<\/strong>download(item_id, formats<strong>=<\/strong>[<font style=\"color:#ff0000\" class=\"has-text-color\">\"Abbyy GZ\"<\/font>], ignore_existing<strong>=<\/strong><font style=\"color:#999988\" class=\"has-text-color\">True<\/font>, \\\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; destdir<strong>=<\/strong>os<strong>.<\/strong>getcwd(), no_directory<strong>=<\/strong><font style=\"color:#999988\" class=\"has-text-color\">True<\/font>) <\/pre>\n\n\n\n<p>\u30d5\u30a1\u30a4\u30eb\u3092\u53d6\u5f97\u3057\u305f\u3089\u3001Python\u306e\u6a19\u6e96\u30e9\u30a4\u30d6\u30e9\u30ea\u3092\u4f7f\u3063\u3066XML\u3092\u89e3\u6790\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002\u5727\u7e2e\u30d5\u30a1\u30a4\u30eb\u3092<font style=\"color:#ff0000\" class=\"has-text-color\">gzip<\/font>\u30e9\u30a4\u30d6\u30e9\u30ea\u3067\u76f4\u63a5\u958b\u3051\u3089\u308c\u308b\u3053\u3068\u3092\u5229\u7528\u3057\u307e\u3059\u3002Abbyy\u30d5\u30a1\u30a4\u30eb\u306f0\u30aa\u30ea\u30b8\u30f3\uff08zero-indexed\uff09\u306e\u305f\u3081\u3001\u30b9\u30ad\u30e3\u30f3\u30b7\u30fc\u30af\u30a8\u30f3\u30b9\u306e\u6700\u521d\u306e\u30da\u30fc\u30b8\u306e\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u306f0\u3068\u306a\u3063\u3066\u3044\u307e\u3059\u3002\u3057\u304b\u3057\u30010\u3067\u306fIA\u304b\u3089\u30ea\u30af\u30a8\u30b9\u30c8\u3067\u304d\u306a\u3044\u305f\u3081\u3001\uff10\u3092\u9664\u53bb\u3057\u306a\u3051\u308c\u3070\u306a\u308a\u307e\u305b\u3093\u3002IA\u304c0\u30a4\u30f3\u30c7\u30c3\u30af\u30b9\u3092\u9664\u5916\u3059\u308b\u3053\u3068\u306f\u3001\u3069\u3053\u306b\u3082\u66f8\u304b\u308c\u3066\u304a\u3089\u305a\u3001\u79c1\u306f\u3053\u308c\u3092\u8a66\u884c\u932f\u8aa4\u3067\u767a\u898b\u3057\u307e\u3057\u305f\u3002\u3082\u3057\u3042\u306a\u305f\u304c\u8aac\u660e\u3057\u306b\u304f\u3044\u30a8\u30e9\u30fc\u30e1\u30c3\u30bb\u30fc\u30b8\u304c\u8868\u793a\u3055\u308c\u305f\u3089\u3001\u30bd\u30fc\u30b9\u3092\u63a2\u3057\u51fa\u3057\u3066\u3001\u4f3c\u305f\u7d4c\u9a13\u306e\u3042\u308b\u4eba\u3084\u7d44\u7e54\u81ea\u4f53\u306b\u52a9\u3051\u3092\u6c42\u3081\u308b\u3053\u3068\u3092\u6050\u308c\u306a\u3044\u3067\u304f\u3060\u3055\u3044\u3002<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\"> # \u5c11\u306a\u304f\u3068\u30821\u3064\u306e\u753b\u50cf\u30d6\u30ed\u30c3\u30af\u3092\u6301\u3064\u30da\u30fc\u30b8\u3092\u53ce\u96c6\u3059\u308b\n img_pages = []\n &nbsp;\n with gzip.open(abbyy_file) as fp:\n &nbsp;&nbsp;&nbsp; tree = ET.parse(fp)\n &nbsp;&nbsp;&nbsp; document = tree.getroot()\n &nbsp;&nbsp;&nbsp; for i, page in enumerate(document):\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; for block in page:\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; try:\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; if block.attrib['blockType'] == 'Picture':\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; img_pages.append(i)\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; break\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; except KeyError:\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; continue\n &nbsp;\n # 0\u306fIA\u3078\u306eGET\u30ea\u30af\u30a8\u30b9\u30c8\u3092\u884c\u3046\u305f\u3081\u306e\u6709\u52b9\u306a\u30da\u30fc\u30b8\u3067\u306f\u3042\u308a\u307e\u305b\u3093\u304c\u3001# \u6642\u3005Abbyy\u306e\u5727\u7e2e\u30d5\u30a1\u30a4\u30eb\u306b\u3042\u308a\u307e\u3059\n &nbsp;\n img_pages = [page for page in img_pages if page &gt; 0]\n &nbsp;\n # \u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u306e\u9032\u884c\u72b6\u6cc1\u306e\u30c8\u30e9\u30c3\u30ad\u30f3\u30b0\u306e\u305f\u3081\u306e\u51e6\u7406\n total_pages = len(img_pages)\n &nbsp;\n # OCR\u30d5\u30a1\u30a4\u30eb\u306f\u5de8\u5927\u306a\u306e\u3067\u3001\u30da\u30fc\u30b8\u30ea\u30b9\u30c8\u304c\u3067\u304d\u305f\u3089\u524a\u9664\u3057\u3066\u304f\u3060\u3055\u3044\u3002\n os.remove(abbyy_file) <\/pre>\n\n\n\n<h4 class=\"wp-block-heading\" id=\"download-images-1\">\u25b6\ufe0e\u753b\u50cf\u3092\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3059\u308b<\/h4>\n\n\n\n<p>IA\u306ePython\u30e9\u30c3\u30d1\u30fc\u306f\u3001\u8907\u6570\u30da\u30fc\u30b8\u306e\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u306e\u307f\u304c\u53ef\u80fd\u3067\u3001\u5358\u4e00\u30da\u30fc\u30b8\u306e\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u6a5f\u80fd\u306f\u63d0\u4f9b\u3057\u3066\u3044\u307e\u305b\u3093\u3002\u3064\u307e\u308a\u3001IA\u306eRESTful API\u3092\u4f7f\u3063\u3066\u7279\u5b9a\u306e\u30da\u30fc\u30b8\u3092\u53d6\u5f97\u3059\u308b\u3053\u3068\u306b\u306a\u308a\u307e\u3059\u3002\u307e\u305a\u3001\u5fc5\u8981\u306a\u30da\u30fc\u30b8\u3054\u3068\u306eURL\u3092\u69cb\u7bc9\u3057\u307e\u3059\u3002\u305d\u3057\u3066<font style=\"color:#ff0000\" class=\"has-text-color\">requests<\/font>\u30e9\u30a4\u30d6\u30e9\u30ea\u3092\u4f7f\u3063\u3066HTTP\u306e<font style=\"color:#ff0000\" class=\"has-text-color\">GET<\/font>\u30ea\u30af\u30a8\u30b9\u30c8\u3092\u9001\u308a\u3001\u3046\u307e\u304f\u3044\u3051\u3070\uff08\u3064\u307e\u308a\u30ec\u30b9\u30dd\u30f3\u30b9\u3067\u30b3\u30fc\u30c9200\u304c\u8fd4\u3063\u3066\u304f\u308b\u306a\u3069\u3059\u308c\u3070\uff09\u3001\u30ec\u30b9\u30dd\u30f3\u30b9\u306e\u5185\u5bb9\u3092JPEG\u30d5\u30a1\u30a4\u30eb\u306b\u66f8\u304d\u51fa\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<p>IA\u306f\u3001\u56fd\u969b\u7684\u306a\u753b\u50cf\u76f8\u4e92\u904b\u7528\u306e\u305f\u3081\u306e\u30d5\u30ec\u30fc\u30e0\u30ef\u30fc\u30af\u3067\u3042\u308b<a href=\"https:\/\/iiif.io\/\">IIIF<\/a>\uff08International Image Interoperability Framework\uff09\u306b\u6e96\u62e0\u3057\u305f\u753b\u50cf\u306e\u30c8\u30ea\u30df\u30f3\u30b0\u3068\u30b5\u30a4\u30ba\u5909\u66f4\u306e\u305f\u3081\u306eAPI\u306e<a href=\"https:\/\/iiif.archivelab.org\/iiif\/documentation\">\u30a2\u30eb\u30d5\u30a1\u7248<\/a>\u306b\u53d6\u308a\u7d44\u3093\u3067\u304d\u307e\u3057\u305f\u3002\u307b\u3068\u3093\u3069\u30b5\u30dd\u30fc\u30c8\u3055\u308c\u3066\u3044\u306a\u3044\u5f62\u5f0f\u3067\u3001JP2\u30d5\u30a1\u30a4\u30eb\u306e\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u3092\u5fc5\u8981\u3068\u3057\u3066\u3044\u305f\u5358\u4e00\u30da\u30fc\u30b8\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u306e\u53e4\u3044\u65b9\u6cd5\u3068\u6bd4\u3079\u3001IIIF\u306f\u5927\u304d\u306a\u6539\u5584\u3092\u3082\u305f\u3089\u3057\u307e\u3057\u305f\u3002\u4eca\u3067\u306f\u3001\u30b7\u30f3\u30b0\u30eb\u30da\u30fc\u30b8\u306eJPEG\u3092\u53d6\u5f97\u3059\u308b\u306e\u306f\u975e\u5e38\u306b\u7c21\u5358\u3067\u3059\u3002<\/p>\n\n\n\n<pre class=\"wp-block-preformatted\">  # <a href=\"https:\/\/iiif.archivelab.org\/iiif\/documentation\">https:\/\/iiif.archivelab.org\/iiif\/documentation<\/a>\u3092\u898b\u3088\n urls = [\"https:\/\/iiif.archivelab.org\/iiif\/{}${}\/full\/full\/0\/default.jpg\".format(item_id, page)\n &nbsp;&nbsp;&nbsp; for page in img_pages]\n &nbsp;\n # python\u30e9\u30a4\u30d6\u30e9\u30ea\u3092\u4ecb\u3057\u3066\u76f4\u63a5\u30da\u30fc\u30b8\u3092\u30c0\u30a6\u30f3\u30ed\u30fc\u30c9\u305b\u305a\u3001GET\u30ea\u30af\u30a8\u30b9\u30c8\u3092\u69cb\u7bc9\u3059\u308b\n for i, page, url in zip(range(1,total_pages), img_pages, urls):\n &nbsp;\n &nbsp;&nbsp;&nbsp; rsp = requests.get(url, allow_redirects=True)\n &nbsp;\n &nbsp;&nbsp;&nbsp; if rsp.status_code == 200:\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; print(\"[{}] Downloading page {} ({}\/{})\".format(item_id, \\\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; page, i+1, total_pages))\n &nbsp;\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; with open(os.path.join(out_dir, str(page) + \".jpg\"), \"wb\") as fp:\n &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; fp.write(rsp.content) <\/pre>\n\n\n\n<h2 class=\"wp-block-heading\" id=\"next-steps\">\u3053\u306e\u5f8c\u306e\u30b9\u30c6\u30c3\u30d7<\/h2>\n\n\n\n<p>\u30ce\u30fc\u30c8\u30d6\u30c3\u30af\u306e\u4e3b\u8981\u306a\u95a2\u6570\u3068\u30c7\u30fc\u30bf\u3092\u53d6\u308a\u51fa\u3059\u305f\u3081\u306e\u30b3\u30fc\u30c9\u3092\u7406\u89e3\u3057\u305f\u3089\u3001\u30bb\u30eb\u3092\u9806\u756a\u306b\u5b9f\u884c\u3057\u305f\u308a\u3001\u3042\u308b\u3044\u306f\u300cRun\nAll\u300d\u3092\u3057\u305f\u308a\u3057\u3066\u3001\u633f\u7d75\u30da\u30fc\u30b8\u304c\u5165\u3063\u3066\u304f\u308b\u306e\u3092\u81ea\u7531\u306b\u898b\u3066\u307f\u307e\u3057\u3087\u3046\u3002\u3053\u308c\u3089\u306e\u30b9\u30af\u30ea\u30d7\u30c8\u3084\u95a2\u6570\u306f\u3001\u3042\u306a\u305f\u81ea\u8eab\u306e\u7814\u7a76\u8ab2\u984c\u306b\u5408\u308f\u305b\u3066\u6d3b\u7528\u3059\u308b\u3053\u3068\u3092\u304a\u52e7\u3081\u3057\u307e\u3059\u3002<\/p>\n\n\n\n<hr class=\"wp-block-separator\"\/>\n\n\n\n<p><strong>\u8457\u8005\u306b\u3064\u3044\u3066<\/strong><strong><\/strong><\/p>\n\n\n\n<p>Stephen Krewson\u306f\u30a4\u30a7\u30fc\u30eb\u5927\u5b66\u306e\u82f1\u6587\u5b66\u535a\u58eb\u8ab2\u7a0b\u306b\u5728\u7c4d\u3057\u300119\u4e16\u7d00\u521d\u671f\u306e\u9032\u6b69\u4e3b\u7fa9\u7684\u6559\u80b2\u7406\u8ad6\u3068\u5370\u5237\u30e1\u30c7\u30a3\u30a2\u306e\u9593\u306e\u76f8\u4e92\u4f5c\u7528\u306b\u3064\u3044\u3066\u7814\u7a76\u3092\u3057\u3066\u3044\u307e\u3059\u3002\u5f7c\u306f\u30b3\u30f3\u30d4\u30e5\u30fc\u30bf\u30b5\u30a4\u30a8\u30f3\u30b9\u306e\u4fee\u58eb\u53f7\u3092\u6301\u3061\uff08\u540c\u3058\u304f\u30a4\u30a7\u30fc\u30eb\u5927\uff09\u3001\u5927\u898f\u6a21\u306a\u96fb\u5b50\u56f3\u66f8\u9928\u306b\u304a\u3051\u308b\u30d5\u30a3\u30eb\u30bf\u30ea\u30f3\u30b0\u3084\u691c\u7d22\u4f5c\u696d\u306e\u52b9\u7387\u5316\u624b\u6cd5\u306e\u958b\u767a\u3082\u884c\u3063\u3066\u3044\u307e\u3059<\/p>\n\n\n\n<hr class=\"wp-block-separator\"\/>\n\n\n\n<h5 class=\"wp-block-heading\">\u5f15\u7528\u306e\u969b\u306f\u3053\u3061\u3089\u3092\u3054\u5229\u7528\u304f\u3060\u3055\u3044<\/h5>\n\n\n\n<p>\uff1c\u539f\u8457\uff1e<br>Stephen Krewson, &#8220;Extracting Illustrated Pages from Digital Libraries with Python,&#8221; <em>The Programming Historian<\/em> 8 (2019), <a href=\"https:\/\/programminghistorian.org\/en\/lessons\/extracting-illustrated-pages\">https:\/\/programminghistorian.org\/en\/lessons\/extracting-illustrated-pages<\/a>.<\/p>\n\n\n\n<p>\uff1c\u7ffb\u8a33\u8a18\u4e8b\uff1e<br>Stephen Krewson\u8457, \u83ca\u6c60\u4fe1\u5f66\u8a33. Python\u3092\u4f7f\u3063\u3066\u96fb\u5b50\u56f3\u66f8\u9928\u304b\u3089\u633f\u7d75\u306e\u30da\u30fc\u30b8\u3092\u62bd\u51fa\u3059\u308b. \u6771\u30a2\u30b8\u30a2DH\u30dd\u30fc\u30bf\u30eb. 2020. <a href=\"https:\/\/www.dh.ku-orcas.kansai-u.ac.jp\/?p=390\">https:\/\/www.dh.ku-orcas.kansai-u.ac.jp\/?p=390<\/a>.<\/p>\n\n\n\n<a rel=\"license\" href=\"http:\/\/creativecommons.org\/licenses\/by\/4.0\/\"><img decoding=\"async\" alt=\"\u30af\u30ea\u30a8\u30a4\u30c6\u30a3\u30d6\u30fb\u30b3\u30e2\u30f3\u30ba\u30fb\u30e9\u30a4\u30bb\u30f3\u30b9\" style=\"border-width:0\" src=\"https:\/\/i.creativecommons.org\/l\/by\/4.0\/88x31.png\"><\/a><br>\u3053\u306e \u4f5c\u54c1 \u306f <a rel=\"license\" href=\"http:\/\/creativecommons.org\/licenses\/by\/4.0\/\">\u30af\u30ea\u30a8\u30a4\u30c6\u30a3\u30d6\u30fb\u30b3\u30e2\u30f3\u30ba \u8868\u793a 4.0 \u56fd\u969b \u30e9\u30a4\u30bb\u30f3\u30b9<\/a>\u306e\u4e0b\u306b\u63d0\u4f9b\u3055\u308c\u3066\u3044\u307e\u3059\u3002\n\n\n\n<hr class=\"wp-block-separator\"\/>\n\n\n\n<div class=\"fb-comments\" data-href=\"https:\/\/www.dh.ku-orcas.kansai-u.ac.jp\/?p=390\" data-numposts=\"5\" data-width=\"100%\"><\/div>\n","protected":false},"excerpt":{"rendered":"<p>Stephen Krewson HathiTrust\u3068Internet Arch<\/p>\n","protected":false},"author":1,"featured_media":392,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":[],"categories":[2],"tags":[13,14,20],"_links":{"self":[{"href":"https:\/\/www.dh.ku-orcas.kansai-u.ac.jp\/index.php?rest_route=\/wp\/v2\/posts\/390"}],"collection":[{"href":"https:\/\/www.dh.ku-orcas.kansai-u.ac.jp\/index.php?rest_route=\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/www.dh.ku-orcas.kansai-u.ac.jp\/index.php?rest_route=\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/www.dh.ku-orcas.kansai-u.ac.jp\/index.php?rest_route=\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/www.dh.ku-orcas.kansai-u.ac.jp\/index.php?rest_route=%2Fwp%2Fv2%2Fcomments&post=390"}],"version-history":[{"count":25,"href":"https:\/\/www.dh.ku-orcas.kansai-u.ac.jp\/index.php?rest_route=\/wp\/v2\/posts\/390\/revisions"}],"predecessor-version":[{"id":786,"href":"https:\/\/www.dh.ku-orcas.kansai-u.ac.jp\/index.php?rest_route=\/wp\/v2\/posts\/390\/revisions\/786"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/www.dh.ku-orcas.kansai-u.ac.jp\/index.php?rest_route=\/wp\/v2\/media\/392"}],"wp:attachment":[{"href":"https:\/\/www.dh.ku-orcas.kansai-u.ac.jp\/index.php?rest_route=%2Fwp%2Fv2%2Fmedia&parent=390"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/www.dh.ku-orcas.kansai-u.ac.jp\/index.php?rest_route=%2Fwp%2Fv2%2Fcategories&post=390"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/www.dh.ku-orcas.kansai-u.ac.jp\/index.php?rest_route=%2Fwp%2Fv2%2Ftags&post=390"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}